96020693ac29917e723b8e91f9e2ce4db4ca9c91
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000         /* 4s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0xEE6b27FF /* 8 seconds */
99
100 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
101
102 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
103
104 #define MONITOR_SOB_STRING_SIZE         256
105
106 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
107         GAUDI_QUEUE_ID_DMA_0_0,
108         GAUDI_QUEUE_ID_DMA_0_1,
109         GAUDI_QUEUE_ID_DMA_0_2,
110         GAUDI_QUEUE_ID_DMA_0_3,
111         GAUDI_QUEUE_ID_DMA_1_0,
112         GAUDI_QUEUE_ID_DMA_1_1,
113         GAUDI_QUEUE_ID_DMA_1_2,
114         GAUDI_QUEUE_ID_DMA_1_3
115 };
116
117 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
118                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
119                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
120                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
121                 "gaudi cpu eq"
122 };
123
124 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
125         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
126         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
127         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
128         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
129         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
130         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
131         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
132         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
133 };
134
135 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
136         [0] = GAUDI_QUEUE_ID_DMA_0_0,
137         [1] = GAUDI_QUEUE_ID_DMA_0_1,
138         [2] = GAUDI_QUEUE_ID_DMA_0_2,
139         [3] = GAUDI_QUEUE_ID_DMA_0_3,
140         [4] = GAUDI_QUEUE_ID_DMA_1_0,
141         [5] = GAUDI_QUEUE_ID_DMA_1_1,
142         [6] = GAUDI_QUEUE_ID_DMA_1_2,
143         [7] = GAUDI_QUEUE_ID_DMA_1_3,
144 };
145
146 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
147         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
148         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
149         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
150         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
151         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
152         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
153         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
154         [PACKET_FENCE]          = sizeof(struct packet_fence),
155         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
156         [PACKET_NOP]            = sizeof(struct packet_nop),
157         [PACKET_STOP]           = sizeof(struct packet_stop),
158         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
159         [PACKET_WAIT]           = sizeof(struct packet_wait),
160         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
161 };
162
163 static inline bool validate_packet_id(enum packet_id id)
164 {
165         switch (id) {
166         case PACKET_WREG_32:
167         case PACKET_WREG_BULK:
168         case PACKET_MSG_LONG:
169         case PACKET_MSG_SHORT:
170         case PACKET_CP_DMA:
171         case PACKET_REPEAT:
172         case PACKET_MSG_PROT:
173         case PACKET_FENCE:
174         case PACKET_LIN_DMA:
175         case PACKET_NOP:
176         case PACKET_STOP:
177         case PACKET_ARB_POINT:
178         case PACKET_WAIT:
179         case PACKET_LOAD_AND_EXE:
180                 return true;
181         default:
182                 return false;
183         }
184 }
185
186 static const char * const
187 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
188         "tpc_address_exceed_slm",
189         "tpc_div_by_0",
190         "tpc_spu_mac_overflow",
191         "tpc_spu_addsub_overflow",
192         "tpc_spu_abs_overflow",
193         "tpc_spu_fp_dst_nan_inf",
194         "tpc_spu_fp_dst_denorm",
195         "tpc_vpu_mac_overflow",
196         "tpc_vpu_addsub_overflow",
197         "tpc_vpu_abs_overflow",
198         "tpc_vpu_fp_dst_nan_inf",
199         "tpc_vpu_fp_dst_denorm",
200         "tpc_assertions",
201         "tpc_illegal_instruction",
202         "tpc_pc_wrap_around",
203         "tpc_qm_sw_err",
204         "tpc_hbw_rresp_err",
205         "tpc_hbw_bresp_err",
206         "tpc_lbw_rresp_err",
207         "tpc_lbw_bresp_err"
208 };
209
210 static const char * const
211 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
212         "PQ AXI HBW error",
213         "CQ AXI HBW error",
214         "CP AXI HBW error",
215         "CP error due to undefined OPCODE",
216         "CP encountered STOP OPCODE",
217         "CP AXI LBW error",
218         "CP WRREG32 or WRBULK returned error",
219         "N/A",
220         "FENCE 0 inc over max value and clipped",
221         "FENCE 1 inc over max value and clipped",
222         "FENCE 2 inc over max value and clipped",
223         "FENCE 3 inc over max value and clipped",
224         "FENCE 0 dec under min value and clipped",
225         "FENCE 1 dec under min value and clipped",
226         "FENCE 2 dec under min value and clipped",
227         "FENCE 3 dec under min value and clipped"
228 };
229
230 static const char * const
231 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
232         "Choice push while full error",
233         "Choice Q watchdog error",
234         "MSG AXI LBW returned with error"
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 static const int gaudi_queue_id_to_engine_id[] = {
434         [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
435         [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
436         [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
437         [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
438         [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
439         [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
440         [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
441         [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
442         [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
443         [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
444         [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
445         [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
446         [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
447         [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
448         [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
449         [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
450         [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
451         [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
452         [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
453         [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
454         [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
455         [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
456         [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
457         [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
458         [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
459         [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
460         [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
461         [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
462         [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
463 };
464
465 /* The order here is opposite to the order of the indexing in the h/w.
466  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
467  */
468 static const char * const gaudi_sync_manager_names[] = {
469         "SYNC_MGR_E_N",
470         "SYNC_MGR_W_N",
471         "SYNC_MGR_E_S",
472         "SYNC_MGR_W_S",
473         NULL
474 };
475
476 struct ecc_info_extract_params {
477         u64 block_address;
478         u32 num_memories;
479         bool derr;
480 };
481
482 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
483                                                                 u64 phys_addr);
484 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
485                                         struct hl_cs_job *job);
486 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
487                                         u32 size, u64 val);
488 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
489                                         u32 num_regs, u32 val);
490 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
491                                 u32 tpc_id);
492 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
493 static int gaudi_cpucp_info_get(struct hl_device *hdev);
494 static void gaudi_disable_clock_gating(struct hl_device *hdev);
495 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
496 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
497                                 u32 size, bool eb);
498 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
499                                 struct hl_gen_wait_properties *prop);
500 static inline enum hl_collective_mode
501 get_collective_mode(struct hl_device *hdev, u32 queue_id)
502 {
503         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
504                 return HL_COLLECTIVE_MASTER;
505
506         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
507                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
508                 return HL_COLLECTIVE_SLAVE;
509
510         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
511                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
512                 return HL_COLLECTIVE_SLAVE;
513
514         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
515                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
516                 return HL_COLLECTIVE_SLAVE;
517
518         return HL_COLLECTIVE_NOT_SUPPORTED;
519 }
520
521 static inline void set_default_power_values(struct hl_device *hdev)
522 {
523         struct asic_fixed_properties *prop = &hdev->asic_prop;
524
525         if (hdev->card_type == cpucp_card_type_pmc) {
526                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
527
528                 if (prop->fw_security_enabled)
529                         prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
530                 else
531                         prop->dc_power_default = DC_POWER_DEFAULT_PMC;
532         } else {
533                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
534                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
535         }
536 }
537
538 static int gaudi_set_fixed_properties(struct hl_device *hdev)
539 {
540         struct asic_fixed_properties *prop = &hdev->asic_prop;
541         u32 num_sync_stream_queues = 0;
542         int i;
543
544         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
545         prop->hw_queues_props = kcalloc(prop->max_queues,
546                         sizeof(struct hw_queue_properties),
547                         GFP_KERNEL);
548
549         if (!prop->hw_queues_props)
550                 return -ENOMEM;
551
552         for (i = 0 ; i < prop->max_queues ; i++) {
553                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
554                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
555                         prop->hw_queues_props[i].driver_only = 0;
556                         prop->hw_queues_props[i].supports_sync_stream = 1;
557                         prop->hw_queues_props[i].cb_alloc_flags =
558                                 CB_ALLOC_KERNEL;
559                         num_sync_stream_queues++;
560                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
561                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
562                         prop->hw_queues_props[i].driver_only = 1;
563                         prop->hw_queues_props[i].supports_sync_stream = 0;
564                         prop->hw_queues_props[i].cb_alloc_flags =
565                                 CB_ALLOC_KERNEL;
566                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
567                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
568                         prop->hw_queues_props[i].driver_only = 0;
569                         prop->hw_queues_props[i].supports_sync_stream = 0;
570                         prop->hw_queues_props[i].cb_alloc_flags =
571                                 CB_ALLOC_USER;
572
573                 }
574                 prop->hw_queues_props[i].collective_mode =
575                                                 get_collective_mode(hdev, i);
576         }
577
578         prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
579         prop->cfg_base_address = CFG_BASE;
580         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
581         prop->host_base_address = HOST_PHYS_BASE;
582         prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
583         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
584         prop->completion_mode = HL_COMPLETION_MODE_JOB;
585         prop->collective_first_sob = 0;
586         prop->collective_first_mon = 0;
587
588         /* 2 SOBs per internal queue stream are reserved for collective */
589         prop->sync_stream_first_sob =
590                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
591                         * QMAN_STREAMS * HL_RSVD_SOBS;
592
593         /* 1 monitor per internal queue stream are reserved for collective
594          * 2 monitors per external queue stream are reserved for collective
595          */
596         prop->sync_stream_first_mon =
597                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
598                         (NUMBER_OF_EXT_HW_QUEUES * 2);
599
600         prop->dram_base_address = DRAM_PHYS_BASE;
601         prop->dram_size = GAUDI_HBM_SIZE_32GB;
602         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
603         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
604
605         prop->sram_base_address = SRAM_BASE_ADDR;
606         prop->sram_size = SRAM_SIZE;
607         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
608         prop->sram_user_base_address =
609                         prop->sram_base_address + SRAM_USER_BASE_OFFSET;
610
611         prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
612         prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
613
614         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
615         if (hdev->pldm)
616                 prop->mmu_pgt_size = 0x800000; /* 8MB */
617         else
618                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
619         prop->mmu_pte_size = HL_PTE_SIZE;
620         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
621         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
622         prop->dram_page_size = PAGE_SIZE_2MB;
623         prop->device_mem_alloc_default_page_size = prop->dram_page_size;
624         prop->dram_supports_virtual_memory = false;
625
626         prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
627         prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
628         prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
629         prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
630         prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
631         prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
632         prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
633         prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
634         prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
635         prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
636         prop->pmmu.start_addr = VA_HOST_SPACE_START;
637         prop->pmmu.end_addr =
638                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
639         prop->pmmu.page_size = PAGE_SIZE_4KB;
640         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
641         prop->pmmu.last_mask = LAST_MASK;
642         /* TODO: will be duplicated until implementing per-MMU props */
643         prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
644         prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
645
646         /* PMMU and HPMMU are the same except of page size */
647         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
648         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
649
650         /* shifts and masks are the same in PMMU and DMMU */
651         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
652         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
653         prop->dmmu.end_addr = VA_HOST_SPACE_END;
654         prop->dmmu.page_size = PAGE_SIZE_2MB;
655
656         prop->cfg_size = CFG_SIZE;
657         prop->max_asid = MAX_ASID;
658         prop->num_of_events = GAUDI_EVENT_SIZE;
659         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
660
661         set_default_power_values(hdev);
662
663         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
664         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
665
666         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
667         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
668
669         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
670                                         CARD_NAME_MAX_LEN);
671
672         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
673
674         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
675                         prop->sync_stream_first_sob +
676                         (num_sync_stream_queues * HL_RSVD_SOBS);
677         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
678                         prop->sync_stream_first_mon +
679                         (num_sync_stream_queues * HL_RSVD_MONS);
680
681         prop->first_available_user_interrupt = USHRT_MAX;
682
683         for (i = 0 ; i < HL_MAX_DCORES ; i++)
684                 prop->first_available_cq[i] = USHRT_MAX;
685
686         prop->fw_cpu_boot_dev_sts0_valid = false;
687         prop->fw_cpu_boot_dev_sts1_valid = false;
688         prop->hard_reset_done_by_fw = false;
689         prop->gic_interrupts_enable = true;
690
691         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692
693         prop->clk_pll_index = HL_GAUDI_MME_PLL;
694         prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695
696         prop->use_get_power_for_reset_history = true;
697
698         prop->configurable_stop_on_err = true;
699
700         prop->set_max_power_on_device_init = true;
701
702         prop->dma_mask = 48;
703
704         return 0;
705 }
706
707 static int gaudi_pci_bars_map(struct hl_device *hdev)
708 {
709         static const char * const name[] = {"SRAM", "CFG", "HBM"};
710         bool is_wc[3] = {false, false, true};
711         int rc;
712
713         rc = hl_pci_bars_map(hdev, name, is_wc);
714         if (rc)
715                 return rc;
716
717         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
719
720         return 0;
721 }
722
723 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724 {
725         struct gaudi_device *gaudi = hdev->asic_specific;
726         struct hl_inbound_pci_region pci_region;
727         u64 old_addr = addr;
728         int rc;
729
730         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
731                 return old_addr;
732
733         if (hdev->asic_prop.iatu_done_by_fw)
734                 return U64_MAX;
735
736         /* Inbound Region 2 - Bar 4 - Point to HBM */
737         pci_region.mode = PCI_BAR_MATCH_MODE;
738         pci_region.bar = HBM_BAR_ID;
739         pci_region.addr = addr;
740         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
741         if (rc)
742                 return U64_MAX;
743
744         if (gaudi) {
745                 old_addr = gaudi->hbm_bar_cur_addr;
746                 gaudi->hbm_bar_cur_addr = addr;
747         }
748
749         return old_addr;
750 }
751
752 static int gaudi_init_iatu(struct hl_device *hdev)
753 {
754         struct hl_inbound_pci_region inbound_region;
755         struct hl_outbound_pci_region outbound_region;
756         int rc;
757
758         if (hdev->asic_prop.iatu_done_by_fw)
759                 return 0;
760
761         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
762         inbound_region.mode = PCI_BAR_MATCH_MODE;
763         inbound_region.bar = SRAM_BAR_ID;
764         inbound_region.addr = SRAM_BASE_ADDR;
765         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
766         if (rc)
767                 goto done;
768
769         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
770         inbound_region.mode = PCI_BAR_MATCH_MODE;
771         inbound_region.bar = CFG_BAR_ID;
772         inbound_region.addr = SPI_FLASH_BASE_ADDR;
773         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
774         if (rc)
775                 goto done;
776
777         /* Inbound Region 2 - Bar 4 - Point to HBM */
778         inbound_region.mode = PCI_BAR_MATCH_MODE;
779         inbound_region.bar = HBM_BAR_ID;
780         inbound_region.addr = DRAM_PHYS_BASE;
781         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
782         if (rc)
783                 goto done;
784
785         /* Outbound Region 0 - Point to Host */
786         outbound_region.addr = HOST_PHYS_BASE;
787         outbound_region.size = HOST_PHYS_SIZE;
788         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
789
790 done:
791         return rc;
792 }
793
794 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
795 {
796         return RREG32(mmHW_STATE);
797 }
798
799 static int gaudi_early_init(struct hl_device *hdev)
800 {
801         struct asic_fixed_properties *prop = &hdev->asic_prop;
802         struct pci_dev *pdev = hdev->pdev;
803         resource_size_t pci_bar_size;
804         u32 fw_boot_status;
805         int rc;
806
807         rc = gaudi_set_fixed_properties(hdev);
808         if (rc) {
809                 dev_err(hdev->dev, "Failed setting fixed properties\n");
810                 return rc;
811         }
812
813         /* Check BAR sizes */
814         pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
815
816         if (pci_bar_size != SRAM_BAR_SIZE) {
817                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
818                         SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
819                 rc = -ENODEV;
820                 goto free_queue_props;
821         }
822
823         pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
824
825         if (pci_bar_size != CFG_BAR_SIZE) {
826                 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
827                         CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
828                 rc = -ENODEV;
829                 goto free_queue_props;
830         }
831
832         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
833         hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
834
835         /* If FW security is enabled at this point it means no access to ELBI */
836         if (hdev->asic_prop.fw_security_enabled) {
837                 hdev->asic_prop.iatu_done_by_fw = true;
838
839                 /*
840                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
841                  * decision can only be taken based on PCI ID security.
842                  */
843                 hdev->asic_prop.gic_interrupts_enable = false;
844                 goto pci_init;
845         }
846
847         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
848                                 &fw_boot_status);
849         if (rc)
850                 goto free_queue_props;
851
852         /* Check whether FW is configuring iATU */
853         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
854                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
855                 hdev->asic_prop.iatu_done_by_fw = true;
856
857 pci_init:
858         rc = hl_pci_init(hdev);
859         if (rc)
860                 goto free_queue_props;
861
862         /* Before continuing in the initialization, we need to read the preboot
863          * version to determine whether we run with a security-enabled firmware
864          */
865         rc = hl_fw_read_preboot_status(hdev);
866         if (rc) {
867                 if (hdev->reset_on_preboot_fail)
868                         hdev->asic_funcs->hw_fini(hdev, true, false);
869                 goto pci_fini;
870         }
871
872         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
873                 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
874                 hdev->asic_funcs->hw_fini(hdev, true, false);
875         }
876
877         return 0;
878
879 pci_fini:
880         hl_pci_fini(hdev);
881 free_queue_props:
882         kfree(hdev->asic_prop.hw_queues_props);
883         return rc;
884 }
885
886 static int gaudi_early_fini(struct hl_device *hdev)
887 {
888         kfree(hdev->asic_prop.hw_queues_props);
889         hl_pci_fini(hdev);
890
891         return 0;
892 }
893
894 /**
895  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
896  *
897  * @hdev: pointer to hl_device structure
898  *
899  */
900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
901 {
902         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
903         struct asic_fixed_properties *prop = &hdev->asic_prop;
904         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
905         int rc;
906
907         if ((hdev->fw_components & FW_TYPE_LINUX) &&
908                         (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
909                 struct gaudi_device *gaudi = hdev->asic_specific;
910
911                 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
912                         return 0;
913
914                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
915
916                 if (rc)
917                         return rc;
918
919                 freq = pll_freq_arr[2];
920         } else {
921                 /* Backward compatibility */
922                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
923                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
924                 nr = RREG32(mmPSOC_CPU_PLL_NR);
925                 nf = RREG32(mmPSOC_CPU_PLL_NF);
926                 od = RREG32(mmPSOC_CPU_PLL_OD);
927
928                 if (div_sel == DIV_SEL_REF_CLK ||
929                                 div_sel == DIV_SEL_DIVIDED_REF) {
930                         if (div_sel == DIV_SEL_REF_CLK)
931                                 freq = PLL_REF_CLK;
932                         else
933                                 freq = PLL_REF_CLK / (div_fctr + 1);
934                 } else if (div_sel == DIV_SEL_PLL_CLK ||
935                         div_sel == DIV_SEL_DIVIDED_PLL) {
936                         pll_clk = PLL_REF_CLK * (nf + 1) /
937                                         ((nr + 1) * (od + 1));
938                         if (div_sel == DIV_SEL_PLL_CLK)
939                                 freq = pll_clk;
940                         else
941                                 freq = pll_clk / (div_fctr + 1);
942                 } else {
943                         dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
944                         freq = 0;
945                 }
946         }
947
948         prop->psoc_timestamp_frequency = freq;
949         prop->psoc_pci_pll_nr = nr;
950         prop->psoc_pci_pll_nf = nf;
951         prop->psoc_pci_pll_od = od;
952         prop->psoc_pci_pll_div_factor = div_fctr;
953
954         return 0;
955 }
956
957 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
958                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
959 {
960         struct asic_fixed_properties *prop = &hdev->asic_prop;
961         struct packet_lin_dma *init_tpc_mem_pkt;
962         struct hl_cs_job *job;
963         struct hl_cb *cb;
964         u64 dst_addr;
965         u32 cb_size, ctl;
966         u8 tpc_id;
967         int rc;
968
969         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
970         if (!cb)
971                 return -EFAULT;
972
973         init_tpc_mem_pkt = cb->kernel_address;
974         cb_size = sizeof(*init_tpc_mem_pkt);
975         memset(init_tpc_mem_pkt, 0, cb_size);
976
977         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
978
979         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
980         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
981         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
982         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
983
984         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
985
986         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
987         dst_addr = (prop->sram_user_base_address &
988                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
989                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
990         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
991
992         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
993         if (!job) {
994                 dev_err(hdev->dev, "Failed to allocate a new job\n");
995                 rc = -ENOMEM;
996                 goto release_cb;
997         }
998
999         job->id = 0;
1000         job->user_cb = cb;
1001         atomic_inc(&job->user_cb->cs_cnt);
1002         job->user_cb_size = cb_size;
1003         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1004         job->patched_cb = job->user_cb;
1005         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1006
1007         hl_debugfs_add_job(hdev, job);
1008
1009         rc = gaudi_send_job_on_qman0(hdev, job);
1010
1011         if (rc)
1012                 goto free_job;
1013
1014         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1015                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1016                 if (rc)
1017                         break;
1018         }
1019
1020 free_job:
1021         hl_userptr_delete_list(hdev, &job->userptr_list);
1022         hl_debugfs_remove_job(hdev, job);
1023         kfree(job);
1024         atomic_dec(&cb->cs_cnt);
1025
1026 release_cb:
1027         hl_cb_put(cb);
1028         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1029
1030         return rc;
1031 }
1032
1033 /*
1034  * gaudi_init_tpc_mem() - Initialize TPC memories.
1035  * @hdev: Pointer to hl_device structure.
1036  *
1037  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1038  *
1039  * Return: 0 for success, negative value for error.
1040  */
1041 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1042 {
1043         const struct firmware *fw;
1044         size_t fw_size;
1045         void *cpu_addr;
1046         dma_addr_t dma_handle;
1047         int rc, count = 5;
1048
1049 again:
1050         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1051         if (rc == -EINTR && count-- > 0) {
1052                 msleep(50);
1053                 goto again;
1054         }
1055
1056         if (rc) {
1057                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1058                                 GAUDI_TPC_FW_FILE);
1059                 goto out;
1060         }
1061
1062         fw_size = fw->size;
1063         cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1064         if (!cpu_addr) {
1065                 dev_err(hdev->dev,
1066                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1067                         fw_size);
1068                 rc = -ENOMEM;
1069                 goto out;
1070         }
1071
1072         memcpy(cpu_addr, fw->data, fw_size);
1073
1074         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1075
1076         hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1077
1078 out:
1079         release_firmware(fw);
1080         return rc;
1081 }
1082
1083 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1084 {
1085         struct gaudi_device *gaudi = hdev->asic_specific;
1086         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1087         struct hl_hw_queue *q;
1088         u32 i, sob_id, sob_group_id, queue_id;
1089
1090         /* Iterate through SOB groups and assign a SOB for each slave queue */
1091         sob_group_id =
1092                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1093         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1094
1095         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1096         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1097                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1098                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1099         }
1100
1101         /* Both DMA5 and TPC7 use the same resources since only a single
1102          * engine need to participate in the reduction process
1103          */
1104         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1105         q = &hdev->kernel_queues[queue_id];
1106         q->sync_stream_prop.collective_sob_id =
1107                         sob_id + NIC_NUMBER_OF_ENGINES;
1108
1109         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1110         q = &hdev->kernel_queues[queue_id];
1111         q->sync_stream_prop.collective_sob_id =
1112                         sob_id + NIC_NUMBER_OF_ENGINES;
1113 }
1114
1115 static void gaudi_sob_group_hw_reset(struct kref *ref)
1116 {
1117         struct gaudi_hw_sob_group *hw_sob_group =
1118                 container_of(ref, struct gaudi_hw_sob_group, kref);
1119         struct hl_device *hdev = hw_sob_group->hdev;
1120         int i;
1121
1122         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1123                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1124                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1125
1126         kref_init(&hw_sob_group->kref);
1127 }
1128
1129 static void gaudi_sob_group_reset_error(struct kref *ref)
1130 {
1131         struct gaudi_hw_sob_group *hw_sob_group =
1132                 container_of(ref, struct gaudi_hw_sob_group, kref);
1133         struct hl_device *hdev = hw_sob_group->hdev;
1134
1135         dev_crit(hdev->dev,
1136                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1137                 hw_sob_group->base_sob_id);
1138 }
1139
1140 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1141 {
1142         struct gaudi_collective_properties *prop;
1143         int i;
1144
1145         prop = &gaudi->collective_props;
1146
1147         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1148
1149         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1150                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1151                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1152                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1153         /* Set collective engine bit */
1154         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1155                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1156 }
1157
1158 static int gaudi_collective_init(struct hl_device *hdev)
1159 {
1160         u32 i, sob_id, reserved_sobs_per_group;
1161         struct gaudi_collective_properties *prop;
1162         struct gaudi_device *gaudi;
1163
1164         gaudi = hdev->asic_specific;
1165         prop = &gaudi->collective_props;
1166         sob_id = hdev->asic_prop.collective_first_sob;
1167
1168         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1169         reserved_sobs_per_group =
1170                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1171
1172         /* Init SOB groups */
1173         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1174                 prop->hw_sob_group[i].hdev = hdev;
1175                 prop->hw_sob_group[i].base_sob_id = sob_id;
1176                 sob_id += reserved_sobs_per_group;
1177                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1178         }
1179
1180         for (i = 0 ; i < QMAN_STREAMS; i++) {
1181                 prop->next_sob_group_val[i] = 1;
1182                 prop->curr_sob_group_idx[i] = 0;
1183                 gaudi_collective_map_sobs(hdev, i);
1184         }
1185
1186         gaudi_collective_mstr_sob_mask_set(gaudi);
1187
1188         return 0;
1189 }
1190
1191 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1192 {
1193         struct gaudi_device *gaudi = hdev->asic_specific;
1194         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1195
1196         kref_put(&cprop->hw_sob_group[sob_group].kref,
1197                                         gaudi_sob_group_hw_reset);
1198 }
1199
1200 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1201                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1202 {
1203         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1204         struct gaudi_collective_properties *cprop;
1205         struct hl_gen_wait_properties wait_prop;
1206         struct hl_sync_stream_properties *prop;
1207         struct gaudi_device *gaudi;
1208
1209         gaudi = hdev->asic_specific;
1210         cprop = &gaudi->collective_props;
1211         queue_id = job->hw_queue_id;
1212         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1213
1214         master_sob_base =
1215                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1216         master_monitor = prop->collective_mstr_mon_id[0];
1217
1218         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1219
1220         dev_dbg(hdev->dev,
1221                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1222                 master_sob_base, cprop->mstr_sob_mask[0],
1223                 cprop->next_sob_group_val[stream],
1224                 master_monitor, queue_id);
1225
1226         wait_prop.data = (void *) job->patched_cb;
1227         wait_prop.sob_base = master_sob_base;
1228         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1229         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1230         wait_prop.mon_id = master_monitor;
1231         wait_prop.q_idx = queue_id;
1232         wait_prop.size = cb_size;
1233         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1234
1235         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1236         master_monitor = prop->collective_mstr_mon_id[1];
1237
1238         dev_dbg(hdev->dev,
1239                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1240                 master_sob_base, cprop->mstr_sob_mask[1],
1241                 cprop->next_sob_group_val[stream],
1242                 master_monitor, queue_id);
1243
1244         wait_prop.sob_base = master_sob_base;
1245         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1246         wait_prop.mon_id = master_monitor;
1247         wait_prop.size = cb_size;
1248         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1249 }
1250
1251 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1252                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1253 {
1254         struct hl_gen_wait_properties wait_prop;
1255         struct hl_sync_stream_properties *prop;
1256         u32 queue_id, cb_size = 0;
1257
1258         queue_id = job->hw_queue_id;
1259         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1260
1261         if (job->cs->encaps_signals) {
1262                 /* use the encaps signal handle store earlier in the flow
1263                  * and set the SOB information from the encaps
1264                  * signals handle
1265                  */
1266                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1267                                                 cs_cmpl);
1268
1269                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1270                                 job->cs->sequence,
1271                                 cs_cmpl->hw_sob->sob_id,
1272                                 cs_cmpl->sob_val);
1273         }
1274
1275         /* Add to wait CBs using slave monitor */
1276         wait_prop.data = (void *) job->user_cb;
1277         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1278         wait_prop.sob_mask = 0x1;
1279         wait_prop.sob_val = cs_cmpl->sob_val;
1280         wait_prop.mon_id = prop->collective_slave_mon_id;
1281         wait_prop.q_idx = queue_id;
1282         wait_prop.size = cb_size;
1283
1284         dev_dbg(hdev->dev,
1285                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1286                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1287                 prop->collective_slave_mon_id, queue_id);
1288
1289         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1290
1291         dev_dbg(hdev->dev,
1292                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1293                 prop->collective_sob_id, queue_id);
1294
1295         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1296                         prop->collective_sob_id, cb_size, false);
1297 }
1298
1299 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1300 {
1301         struct hl_cs_compl *signal_cs_cmpl =
1302                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1303         struct hl_cs_compl *cs_cmpl =
1304                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1305         struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1306         struct gaudi_collective_properties *cprop;
1307         u32 stream, queue_id, sob_group_offset;
1308         struct gaudi_device *gaudi;
1309         struct hl_device *hdev;
1310         struct hl_cs_job *job;
1311         struct hl_ctx *ctx;
1312
1313         ctx = cs->ctx;
1314         hdev = ctx->hdev;
1315         gaudi = hdev->asic_specific;
1316         cprop = &gaudi->collective_props;
1317
1318         if (cs->encaps_signals) {
1319                 cs_cmpl->hw_sob = handle->hw_sob;
1320                 /* at this checkpoint we only need the hw_sob pointer
1321                  * for the completion check before start going over the jobs
1322                  * of the master/slaves, the sob_value will be taken later on
1323                  * in gaudi_collective_slave_init_job depends on each
1324                  * job wait offset value.
1325                  */
1326                 cs_cmpl->sob_val = 0;
1327         } else {
1328                 /* copy the SOB id and value of the signal CS */
1329                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1330                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1331         }
1332
1333         /* check again if the signal cs already completed.
1334          * if yes then don't send any wait cs since the hw_sob
1335          * could be in reset already. if signal is not completed
1336          * then get refcount to hw_sob to prevent resetting the sob
1337          * while wait cs is not submitted.
1338          * note that this check is protected by two locks,
1339          * hw queue lock and completion object lock,
1340          * and the same completion object lock also protects
1341          * the hw_sob reset handler function.
1342          * The hw_queue lock prevent out of sync of hw_sob
1343          * refcount value, changed by signal/wait flows.
1344          */
1345         spin_lock(&signal_cs_cmpl->lock);
1346
1347         if (completion_done(&cs->signal_fence->completion)) {
1348                 spin_unlock(&signal_cs_cmpl->lock);
1349                 return -EINVAL;
1350         }
1351         /* Increment kref since all slave queues are now waiting on it */
1352         kref_get(&cs_cmpl->hw_sob->kref);
1353
1354         spin_unlock(&signal_cs_cmpl->lock);
1355
1356         /* Calculate the stream from collective master queue (1st job) */
1357         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1358         stream = job->hw_queue_id % 4;
1359         sob_group_offset =
1360                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1361
1362         list_for_each_entry(job, &cs->job_list, cs_node) {
1363                 queue_id = job->hw_queue_id;
1364
1365                 if (hdev->kernel_queues[queue_id].collective_mode ==
1366                                 HL_COLLECTIVE_MASTER)
1367                         gaudi_collective_master_init_job(hdev, job, stream,
1368                                                 sob_group_offset);
1369                 else
1370                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1371         }
1372
1373         cs_cmpl->sob_group = sob_group_offset;
1374
1375         /* Handle sob group kref and wraparound */
1376         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1377         cprop->next_sob_group_val[stream]++;
1378
1379         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1380                 /*
1381                  * Decrement as we reached the max value.
1382                  * The release function won't be called here as we've
1383                  * just incremented the refcount.
1384                  */
1385                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1386                                 gaudi_sob_group_reset_error);
1387                 cprop->next_sob_group_val[stream] = 1;
1388                 /* only two SOBs are currently in use */
1389                 cprop->curr_sob_group_idx[stream] =
1390                         (cprop->curr_sob_group_idx[stream] + 1) &
1391                                                         (HL_RSVD_SOBS - 1);
1392
1393                 gaudi_collective_map_sobs(hdev, stream);
1394
1395                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1396                                 cprop->curr_sob_group_idx[stream], stream);
1397         }
1398
1399         mb();
1400         hl_fence_put(cs->signal_fence);
1401         cs->signal_fence = NULL;
1402
1403         return 0;
1404 }
1405
1406 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1407 {
1408         u32 cacheline_end, additional_commands;
1409
1410         cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1411         additional_commands = sizeof(struct packet_msg_prot) * 2;
1412
1413         if (user_cb_size + additional_commands > cacheline_end)
1414                 return cacheline_end - user_cb_size + additional_commands;
1415         else
1416                 return additional_commands;
1417 }
1418
1419 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1420                 struct hl_ctx *ctx, struct hl_cs *cs,
1421                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1422                 u32 encaps_signal_offset)
1423 {
1424         struct hw_queue_properties *hw_queue_prop;
1425         struct hl_cs_counters_atomic *cntr;
1426         struct hl_cs_job *job;
1427         struct hl_cb *cb;
1428         u32 cb_size;
1429         bool patched_cb;
1430
1431         cntr = &hdev->aggregated_cs_counters;
1432
1433         if (mode == HL_COLLECTIVE_MASTER) {
1434                 /* CB size of collective master queue contains
1435                  * 4 msg short packets for monitor 1 configuration
1436                  * 1 fence packet
1437                  * 4 msg short packets for monitor 2 configuration
1438                  * 1 fence packet
1439                  * 2 msg prot packets for completion and MSI
1440                  */
1441                 cb_size = sizeof(struct packet_msg_short) * 8 +
1442                                 sizeof(struct packet_fence) * 2 +
1443                                 sizeof(struct packet_msg_prot) * 2;
1444                 patched_cb = true;
1445         } else {
1446                 /* CB size of collective slave queues contains
1447                  * 4 msg short packets for monitor configuration
1448                  * 1 fence packet
1449                  * 1 additional msg short packet for sob signal
1450                  */
1451                 cb_size = sizeof(struct packet_msg_short) * 5 +
1452                                 sizeof(struct packet_fence);
1453                 patched_cb = false;
1454         }
1455
1456         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1457         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1458         if (!job) {
1459                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1460                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1461                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1462                 return -ENOMEM;
1463         }
1464
1465         /* Allocate internal mapped CB for non patched CBs */
1466         cb = hl_cb_kernel_create(hdev, cb_size,
1467                         hdev->mmu_enable && !patched_cb);
1468         if (!cb) {
1469                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1470                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1471                 kfree(job);
1472                 return -EFAULT;
1473         }
1474
1475         job->id = 0;
1476         job->cs = cs;
1477         job->user_cb = cb;
1478         atomic_inc(&job->user_cb->cs_cnt);
1479         job->user_cb_size = cb_size;
1480         job->hw_queue_id = queue_id;
1481
1482         /* since its guaranteed to have only one chunk in the collective wait
1483          * cs, we can use this chunk to set the encapsulated signal offset
1484          * in the jobs.
1485          */
1486         if (cs->encaps_signals)
1487                 job->encaps_sig_wait_offset = encaps_signal_offset;
1488
1489         /*
1490          * No need in parsing, user CB is the patched CB.
1491          * We call hl_cb_destroy() out of two reasons - we don't need
1492          * the CB in the CB idr anymore and to decrement its refcount as
1493          * it was incremented inside hl_cb_kernel_create().
1494          */
1495         if (patched_cb)
1496                 job->patched_cb = job->user_cb;
1497         else
1498                 job->patched_cb = NULL;
1499
1500         job->job_cb_size = job->user_cb_size;
1501         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1502
1503         /* increment refcount as for external queues we get completion */
1504         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1505                 cs_get(cs);
1506
1507         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1508
1509         list_add_tail(&job->cs_node, &cs->job_list);
1510
1511         hl_debugfs_add_job(hdev, job);
1512
1513         return 0;
1514 }
1515
1516 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1517                 struct hl_ctx *ctx, struct hl_cs *cs,
1518                 u32 wait_queue_id, u32 collective_engine_id,
1519                 u32 encaps_signal_offset)
1520 {
1521         struct gaudi_device *gaudi = hdev->asic_specific;
1522         struct hw_queue_properties *hw_queue_prop;
1523         u32 queue_id, collective_queue, num_jobs;
1524         u32 stream, nic_queue, nic_idx = 0;
1525         bool skip;
1526         int i, rc = 0;
1527
1528         /* Verify wait queue id is configured as master */
1529         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1530         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1531                 dev_err(hdev->dev,
1532                         "Queue %d is not configured as collective master\n",
1533                         wait_queue_id);
1534                 return -EINVAL;
1535         }
1536
1537         /* Verify engine id is supported */
1538         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1539                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1540                 dev_err(hdev->dev,
1541                         "Collective wait does not support engine %u\n",
1542                         collective_engine_id);
1543                 return -EINVAL;
1544         }
1545
1546         stream = wait_queue_id % 4;
1547
1548         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1549                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1550         else
1551                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1552
1553         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1554         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1555
1556         /* First job goes to the collective master queue, it will wait for
1557          * the collective slave queues to finish execution.
1558          * The synchronization is done using two monitors:
1559          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1560          * reduction engine (DMA5/TPC7).
1561          *
1562          * Rest of the jobs goes to the collective slave queues which will
1563          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1564          */
1565         for (i = 0 ; i < num_jobs ; i++) {
1566                 if (i == 0) {
1567                         queue_id = wait_queue_id;
1568                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1569                                 HL_COLLECTIVE_MASTER, queue_id,
1570                                 wait_queue_id, encaps_signal_offset);
1571                 } else {
1572                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1573                                 if (gaudi->hw_cap_initialized &
1574                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1575                                         skip = false;
1576                                 else
1577                                         skip = true;
1578
1579                                 queue_id = nic_queue;
1580                                 nic_queue += 4;
1581                                 nic_idx++;
1582
1583                                 if (skip)
1584                                         continue;
1585                         } else {
1586                                 queue_id = collective_queue;
1587                         }
1588
1589                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1590                                 HL_COLLECTIVE_SLAVE, queue_id,
1591                                 wait_queue_id, encaps_signal_offset);
1592                 }
1593
1594                 if (rc)
1595                         return rc;
1596         }
1597
1598         return rc;
1599 }
1600
1601 static int gaudi_late_init(struct hl_device *hdev)
1602 {
1603         struct gaudi_device *gaudi = hdev->asic_specific;
1604         int rc;
1605
1606         rc = gaudi->cpucp_info_get(hdev);
1607         if (rc) {
1608                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1609                 return rc;
1610         }
1611
1612         if ((hdev->card_type == cpucp_card_type_pci) &&
1613                         (hdev->nic_ports_mask & 0x3)) {
1614                 dev_info(hdev->dev,
1615                         "PCI card detected, only 8 ports are enabled\n");
1616                 hdev->nic_ports_mask &= ~0x3;
1617
1618                 /* Stop and disable unused NIC QMANs */
1619                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1620                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1621                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1622
1623                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1624                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1625                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1626
1627                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1628                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1629
1630                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1631         }
1632
1633         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1634         if (rc) {
1635                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1636                 return rc;
1637         }
1638
1639         /* Scrub both SRAM and DRAM */
1640         rc = hdev->asic_funcs->scrub_device_mem(hdev);
1641         if (rc)
1642                 goto disable_pci_access;
1643
1644         rc = gaudi_fetch_psoc_frequency(hdev);
1645         if (rc) {
1646                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1647                 goto disable_pci_access;
1648         }
1649
1650         rc = gaudi_mmu_clear_pgt_range(hdev);
1651         if (rc) {
1652                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1653                 goto disable_pci_access;
1654         }
1655
1656         rc = gaudi_init_tpc_mem(hdev);
1657         if (rc) {
1658                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1659                 goto disable_pci_access;
1660         }
1661
1662         rc = gaudi_collective_init(hdev);
1663         if (rc) {
1664                 dev_err(hdev->dev, "Failed to init collective\n");
1665                 goto disable_pci_access;
1666         }
1667
1668         /* We only support a single ASID for the user, so for the sake of optimization, just
1669          * initialize the ASID one time during device initialization with the fixed value of 1
1670          */
1671         gaudi_mmu_prepare(hdev, 1);
1672
1673         hl_fw_set_pll_profile(hdev);
1674
1675         return 0;
1676
1677 disable_pci_access:
1678         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1679
1680         return rc;
1681 }
1682
1683 static void gaudi_late_fini(struct hl_device *hdev)
1684 {
1685         hl_hwmon_release_resources(hdev);
1686 }
1687
1688 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1689 {
1690         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1691         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1692         int i, j, rc = 0;
1693
1694         /*
1695          * The device CPU works with 40-bits addresses, while bit 39 must be set
1696          * to '1' when accessing the host.
1697          * Bits 49:39 of the full host address are saved for a later
1698          * configuration of the HW to perform extension to 50 bits.
1699          * Because there is a single HW register that holds the extension bits,
1700          * these bits must be identical in all allocated range.
1701          */
1702
1703         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1704                 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1705                                                                 &dma_addr_arr[i],
1706                                                                 GFP_KERNEL | __GFP_ZERO);
1707                 if (!virt_addr_arr[i]) {
1708                         rc = -ENOMEM;
1709                         goto free_dma_mem_arr;
1710                 }
1711
1712                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1713                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1714                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1715                         break;
1716         }
1717
1718         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1719                 dev_err(hdev->dev,
1720                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1721                 rc = -EFAULT;
1722                 goto free_dma_mem_arr;
1723         }
1724
1725         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1726         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1727         hdev->cpu_pci_msb_addr =
1728                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1729
1730         if (!hdev->asic_prop.fw_security_enabled)
1731                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1732
1733 free_dma_mem_arr:
1734         for (j = 0 ; j < i ; j++)
1735                 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1736                                                 dma_addr_arr[j]);
1737
1738         return rc;
1739 }
1740
1741 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1742 {
1743         struct gaudi_device *gaudi = hdev->asic_specific;
1744         struct gaudi_internal_qman_info *q;
1745         u32 i;
1746
1747         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1748                 q = &gaudi->internal_qmans[i];
1749                 if (!q->pq_kernel_addr)
1750                         continue;
1751                 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1752         }
1753 }
1754
1755 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1756 {
1757         struct gaudi_device *gaudi = hdev->asic_specific;
1758         struct gaudi_internal_qman_info *q;
1759         int rc, i;
1760
1761         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1762                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1763                         continue;
1764
1765                 q = &gaudi->internal_qmans[i];
1766
1767                 switch (i) {
1768                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1769                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1770                         break;
1771                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1772                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1773                         break;
1774                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1775                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1776                         break;
1777                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1778                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1779                         break;
1780                 default:
1781                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1782                         rc = -EINVAL;
1783                         goto free_internal_qmans_pq_mem;
1784                 }
1785
1786                 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1787                                                                 GFP_KERNEL | __GFP_ZERO);
1788                 if (!q->pq_kernel_addr) {
1789                         rc = -ENOMEM;
1790                         goto free_internal_qmans_pq_mem;
1791                 }
1792         }
1793
1794         return 0;
1795
1796 free_internal_qmans_pq_mem:
1797         gaudi_free_internal_qmans_pq_mem(hdev);
1798         return rc;
1799 }
1800
1801 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1802 {
1803         struct asic_fixed_properties *prop = &hdev->asic_prop;
1804         struct pci_mem_region *region;
1805
1806         /* CFG */
1807         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1808         region->region_base = CFG_BASE;
1809         region->region_size = CFG_SIZE;
1810         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1811         region->bar_size = CFG_BAR_SIZE;
1812         region->bar_id = CFG_BAR_ID;
1813         region->used = 1;
1814
1815         /* SRAM */
1816         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1817         region->region_base = SRAM_BASE_ADDR;
1818         region->region_size = SRAM_SIZE;
1819         region->offset_in_bar = 0;
1820         region->bar_size = SRAM_BAR_SIZE;
1821         region->bar_id = SRAM_BAR_ID;
1822         region->used = 1;
1823
1824         /* DRAM */
1825         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1826         region->region_base = DRAM_PHYS_BASE;
1827         region->region_size = hdev->asic_prop.dram_size;
1828         region->offset_in_bar = 0;
1829         region->bar_size = prop->dram_pci_bar_size;
1830         region->bar_id = HBM_BAR_ID;
1831         region->used = 1;
1832
1833         /* SP SRAM */
1834         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1835         region->region_base = PSOC_SCRATCHPAD_ADDR;
1836         region->region_size = PSOC_SCRATCHPAD_SIZE;
1837         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1838         region->bar_size = CFG_BAR_SIZE;
1839         region->bar_id = CFG_BAR_ID;
1840         region->used = 1;
1841 }
1842
1843 static int gaudi_sw_init(struct hl_device *hdev)
1844 {
1845         struct gaudi_device *gaudi;
1846         u32 i, event_id = 0;
1847         int rc;
1848
1849         /* Allocate device structure */
1850         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1851         if (!gaudi)
1852                 return -ENOMEM;
1853
1854         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1855                 if (gaudi_irq_map_table[i].valid) {
1856                         if (event_id == GAUDI_EVENT_SIZE) {
1857                                 dev_err(hdev->dev,
1858                                         "Event array exceeds the limit of %u events\n",
1859                                         GAUDI_EVENT_SIZE);
1860                                 rc = -EINVAL;
1861                                 goto free_gaudi_device;
1862                         }
1863
1864                         gaudi->events[event_id++] =
1865                                         gaudi_irq_map_table[i].fc_id;
1866                 }
1867         }
1868
1869         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1870
1871         hdev->asic_specific = gaudi;
1872
1873         /* Create DMA pool for small allocations */
1874         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1875                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1876         if (!hdev->dma_pool) {
1877                 dev_err(hdev->dev, "failed to create DMA pool\n");
1878                 rc = -ENOMEM;
1879                 goto free_gaudi_device;
1880         }
1881
1882         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1883         if (rc)
1884                 goto free_dma_pool;
1885
1886         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1887         if (!hdev->cpu_accessible_dma_pool) {
1888                 dev_err(hdev->dev,
1889                         "Failed to create CPU accessible DMA pool\n");
1890                 rc = -ENOMEM;
1891                 goto free_cpu_dma_mem;
1892         }
1893
1894         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1895                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1896                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1897         if (rc) {
1898                 dev_err(hdev->dev,
1899                         "Failed to add memory to CPU accessible DMA pool\n");
1900                 rc = -EFAULT;
1901                 goto free_cpu_accessible_dma_pool;
1902         }
1903
1904         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1905         if (rc)
1906                 goto free_cpu_accessible_dma_pool;
1907
1908         spin_lock_init(&gaudi->hw_queues_lock);
1909
1910         hdev->supports_sync_stream = true;
1911         hdev->supports_coresight = true;
1912         hdev->supports_staged_submission = true;
1913         hdev->supports_wait_for_multi_cs = true;
1914
1915         hdev->asic_funcs->set_pci_memory_regions(hdev);
1916         hdev->stream_master_qid_arr =
1917                                 hdev->asic_funcs->get_stream_master_qid_arr();
1918         hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1919
1920         return 0;
1921
1922 free_cpu_accessible_dma_pool:
1923         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1924 free_cpu_dma_mem:
1925         if (!hdev->asic_prop.fw_security_enabled)
1926                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1927                                         hdev->cpu_pci_msb_addr);
1928         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1929                                         hdev->cpu_accessible_dma_address);
1930 free_dma_pool:
1931         dma_pool_destroy(hdev->dma_pool);
1932 free_gaudi_device:
1933         kfree(gaudi);
1934         return rc;
1935 }
1936
1937 static int gaudi_sw_fini(struct hl_device *hdev)
1938 {
1939         struct gaudi_device *gaudi = hdev->asic_specific;
1940
1941         gaudi_free_internal_qmans_pq_mem(hdev);
1942
1943         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1944
1945         if (!hdev->asic_prop.fw_security_enabled)
1946                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1947                                         hdev->cpu_pci_msb_addr);
1948
1949         hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1950                                         hdev->cpu_accessible_dma_address);
1951
1952         dma_pool_destroy(hdev->dma_pool);
1953
1954         kfree(gaudi);
1955
1956         return 0;
1957 }
1958
1959 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1960 {
1961         struct hl_device *hdev = arg;
1962         int i;
1963
1964         if (hdev->disabled)
1965                 return IRQ_HANDLED;
1966
1967         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1968                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1969
1970         hl_irq_handler_eq(irq, &hdev->event_queue);
1971
1972         return IRQ_HANDLED;
1973 }
1974
1975 /*
1976  * For backward compatibility, new MSI interrupts should be set after the
1977  * existing CPU and NIC interrupts.
1978  */
1979 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1980                                 bool cpu_eq)
1981 {
1982         int msi_vec;
1983
1984         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1985                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1986                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1987
1988         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1989                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1990
1991         return pci_irq_vector(hdev->pdev, msi_vec);
1992 }
1993
1994 static int gaudi_enable_msi_single(struct hl_device *hdev)
1995 {
1996         int rc, irq;
1997
1998         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1999
2000         irq = gaudi_pci_irq_vector(hdev, 0, false);
2001         rc = request_irq(irq, gaudi_irq_handler_single, 0,
2002                         "gaudi single msi", hdev);
2003         if (rc)
2004                 dev_err(hdev->dev,
2005                         "Failed to request single MSI IRQ\n");
2006
2007         return rc;
2008 }
2009
2010 static int gaudi_enable_msi_multi(struct hl_device *hdev)
2011 {
2012         int cq_cnt = hdev->asic_prop.completion_queues_count;
2013         int rc, i, irq_cnt_init, irq;
2014
2015         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2016                 irq = gaudi_pci_irq_vector(hdev, i, false);
2017                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
2018                                 &hdev->completion_queue[i]);
2019                 if (rc) {
2020                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2021                         goto free_irqs;
2022                 }
2023         }
2024
2025         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
2026         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
2027                                 &hdev->event_queue);
2028         if (rc) {
2029                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2030                 goto free_irqs;
2031         }
2032
2033         return 0;
2034
2035 free_irqs:
2036         for (i = 0 ; i < irq_cnt_init ; i++)
2037                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
2038                                 &hdev->completion_queue[i]);
2039         return rc;
2040 }
2041
2042 static int gaudi_enable_msi(struct hl_device *hdev)
2043 {
2044         struct gaudi_device *gaudi = hdev->asic_specific;
2045         int rc;
2046
2047         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2048                 return 0;
2049
2050         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2051         if (rc < 0) {
2052                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2053                 return rc;
2054         }
2055
2056         if (rc < NUMBER_OF_INTERRUPTS) {
2057                 gaudi->multi_msi_mode = false;
2058                 rc = gaudi_enable_msi_single(hdev);
2059         } else {
2060                 gaudi->multi_msi_mode = true;
2061                 rc = gaudi_enable_msi_multi(hdev);
2062         }
2063
2064         if (rc)
2065                 goto free_pci_irq_vectors;
2066
2067         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2068
2069         return 0;
2070
2071 free_pci_irq_vectors:
2072         pci_free_irq_vectors(hdev->pdev);
2073         return rc;
2074 }
2075
2076 static void gaudi_sync_irqs(struct hl_device *hdev)
2077 {
2078         struct gaudi_device *gaudi = hdev->asic_specific;
2079         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2080
2081         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2082                 return;
2083
2084         /* Wait for all pending IRQs to be finished */
2085         if (gaudi->multi_msi_mode) {
2086                 for (i = 0 ; i < cq_cnt ; i++)
2087                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2088
2089                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2090                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2091                                                 true));
2092         } else {
2093                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2094         }
2095 }
2096
2097 static void gaudi_disable_msi(struct hl_device *hdev)
2098 {
2099         struct gaudi_device *gaudi = hdev->asic_specific;
2100         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2101
2102         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2103                 return;
2104
2105         gaudi_sync_irqs(hdev);
2106
2107         if (gaudi->multi_msi_mode) {
2108                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2109                                                 true);
2110                 free_irq(irq, &hdev->event_queue);
2111
2112                 for (i = 0 ; i < cq_cnt ; i++) {
2113                         irq = gaudi_pci_irq_vector(hdev, i, false);
2114                         free_irq(irq, &hdev->completion_queue[i]);
2115                 }
2116         } else {
2117                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2118         }
2119
2120         pci_free_irq_vectors(hdev->pdev);
2121
2122         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2123 }
2124
2125 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2126 {
2127         struct gaudi_device *gaudi = hdev->asic_specific;
2128
2129         if (hdev->asic_prop.fw_security_enabled)
2130                 return;
2131
2132         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2133                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2134                 return;
2135
2136         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2137                 return;
2138
2139         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2140                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2141         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2142                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2143         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2144                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2145         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2146                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2147         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2148                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2149         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2150                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2151         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2152                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2153         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2154                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2155
2156         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2157                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2158         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2159                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2160         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2161                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2162         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2163                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2164         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2165                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2166         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2167                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2168         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2169                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2170         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2171                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2172
2173         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2174                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2175         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2176                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2177         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2178                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2179         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2180                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2181         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2182                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2183         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2184                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2185         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2186                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2187         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2188                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2189
2190         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2191 }
2192
2193 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2194 {
2195         struct gaudi_device *gaudi = hdev->asic_specific;
2196
2197         if (hdev->asic_prop.fw_security_enabled)
2198                 return;
2199
2200         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2201                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2202                 return;
2203
2204         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2205                 return;
2206
2207         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2208                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2209         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2210                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2211         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2212                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2213         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2214                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2215         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2216                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2217         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2218                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2219         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2220                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2221         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2222                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2223
2224         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2225                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2226         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2227                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2228         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2229                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2230         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2231                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2232         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2233                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2234         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2235                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2236         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2237                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2238         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2239                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2240
2241         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2242                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2243         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2244                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2245         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2246                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2247         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2248                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2249         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2250                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2251         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2252                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2253         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2254                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2255         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2256                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2257
2258         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2259 }
2260
2261 static void gaudi_init_e2e(struct hl_device *hdev)
2262 {
2263         if (hdev->asic_prop.fw_security_enabled)
2264                 return;
2265
2266         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2267                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2268                 return;
2269
2270         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2271         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2272         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2273         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2274
2275         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2276         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2277         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2278         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2279
2280         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2281         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2282         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2283         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2284
2285         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2286         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2287         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2288         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2289
2290         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2291         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2292         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2293         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2294
2295         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2296         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2297         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2298         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2299
2300         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2301         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2302         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2303         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2304
2305         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2306         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2307         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2308         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2309
2310         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2311         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2312         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2313         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2314
2315         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2316         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2317         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2318         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2319
2320         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2321         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2322         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2323         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2324
2325         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2326         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2327         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2328         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2329
2330         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2331         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2332         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2333         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2334
2335         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2336         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2337         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2338         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2339
2340         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2341         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2342         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2343         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2344
2345         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2346         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2347         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2348         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2349
2350         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2351         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2352         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2353         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2354
2355         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2356         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2357         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2358         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2359
2360         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2361         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2362         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2363         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2364
2365         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2366         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2367         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2368         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2369
2370         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2371         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2372         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2373         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2374
2375         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2376         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2377         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2378         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2379
2380         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2381         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2382         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2383         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2384
2385         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2386         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2387         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2388         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2389
2390         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2391                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2393                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394
2395         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2396                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2398                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399
2400         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2401                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2403                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404
2405         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2406                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2408                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2411                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2413                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414
2415         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2416                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2417         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2418                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2419
2420         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2421                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2422         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2423                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2424
2425         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2426                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2427         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2428                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2429
2430         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2431                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2432         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2433                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2434
2435         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2436                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2437         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2438                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2439
2440         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2441                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2442         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2443                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2444
2445         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2446                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2447         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2448                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2449
2450         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2451                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2452         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2453                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2454
2455         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2456                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2457         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2458                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2459
2460         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2461                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2462         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2463                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2464
2465         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2466                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2467         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2468                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2469
2470         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2471                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2472         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2473                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2474
2475         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2476                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2477         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2478                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2479
2480         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2481                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2482         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2483                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2484
2485         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2486                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2487         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2488                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2489
2490         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2491                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2492         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2493                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2494
2495         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2496                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2497         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2498                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2499
2500         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2501                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2502         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2503                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2504
2505         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2506                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2507         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2508                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2509 }
2510
2511 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2512 {
2513         u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2514
2515         if (hdev->asic_prop.fw_security_enabled)
2516                 return;
2517
2518         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2519                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2520                 return;
2521
2522         hbm0_wr = 0x33333333;
2523         hbm0_rd = 0x77777777;
2524         hbm1_wr = 0x55555555;
2525         hbm1_rd = 0xDDDDDDDD;
2526
2527         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2528         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2529         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2530         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2531
2532         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2533         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2534         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2535         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2536
2537         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2538         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2539         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2540         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2541
2542         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2543         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2544         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2545         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2546
2547         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2548                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2549                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2550         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2551                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2552                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2553         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2554                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2555                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2556         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2557                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2558                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2559
2560         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2561                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2562                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2563         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2564                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2565                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2566         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2567                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2568                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2569         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2570                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2571                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2572 }
2573
2574 static void gaudi_init_golden_registers(struct hl_device *hdev)
2575 {
2576         u32 tpc_offset;
2577         int tpc_id, i;
2578
2579         gaudi_init_e2e(hdev);
2580         gaudi_init_hbm_cred(hdev);
2581
2582         for (tpc_id = 0, tpc_offset = 0;
2583                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2584                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2585                 /* Mask all arithmetic interrupts from TPC */
2586                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2587                 /* Set 16 cache lines */
2588                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2589                                 ICACHE_FETCH_LINE_NUM, 2);
2590         }
2591
2592         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2593         for (i = 0 ; i < 128 ; i += 8)
2594                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2595
2596         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2597         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2598         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2599         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2600 }
2601
2602 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2603                                         int qman_id, dma_addr_t qman_pq_addr)
2604 {
2605         struct cpu_dyn_regs *dyn_regs =
2606                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2607         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2608         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2609         u32 q_off, dma_qm_offset;
2610         u32 dma_qm_err_cfg, irq_handler_offset;
2611
2612         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2613
2614         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2615                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2616         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2617                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2618         so_base_en_lo = lower_32_bits(CFG_BASE +
2619                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2620         so_base_en_hi = upper_32_bits(CFG_BASE +
2621                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2622         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2623                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2624         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2625                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2626         so_base_ws_lo = lower_32_bits(CFG_BASE +
2627                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2628         so_base_ws_hi = upper_32_bits(CFG_BASE +
2629                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2630
2631         q_off = dma_qm_offset + qman_id * 4;
2632
2633         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2634         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2635
2636         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2637         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2638         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2639
2640         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2641         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2642                                                         QMAN_LDMA_SRC_OFFSET);
2643         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2644                                                         QMAN_LDMA_DST_OFFSET);
2645
2646         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2647         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2648         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2649         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2650         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2651         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2652         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2653         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2654
2655         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2656
2657         /* The following configuration is needed only once per QMAN */
2658         if (qman_id == 0) {
2659                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2660                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2661                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2662
2663                 /* Configure RAZWI IRQ */
2664                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2665                 if (hdev->stop_on_err)
2666                         dma_qm_err_cfg |=
2667                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2668
2669                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2670
2671                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2672                         lower_32_bits(CFG_BASE + irq_handler_offset));
2673                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2674                         upper_32_bits(CFG_BASE + irq_handler_offset));
2675
2676                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2677                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2678                                                                         dma_id);
2679
2680                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2681                                 QM_ARB_ERR_MSG_EN_MASK);
2682
2683                 /* Set timeout to maximum */
2684                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2685
2686                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2687                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2688
2689                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2690         }
2691 }
2692
2693 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2694 {
2695         struct cpu_dyn_regs *dyn_regs =
2696                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2697         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2698         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2699         u32 irq_handler_offset;
2700
2701         /* Set to maximum possible according to physical size */
2702         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2703         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2704
2705         /* WA for H/W bug H3-2116 */
2706         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2707
2708         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2709         if (hdev->stop_on_err)
2710                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2711
2712         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2713
2714         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2715                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2716                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2717
2718         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2719                 lower_32_bits(CFG_BASE + irq_handler_offset));
2720         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2721                 upper_32_bits(CFG_BASE + irq_handler_offset));
2722
2723         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2724                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2725         WREG32(mmDMA0_CORE_PROT + dma_offset,
2726                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2727         /* If the channel is secured, it should be in MMU bypass mode */
2728         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2729                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2730         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2731 }
2732
2733 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2734                                 u32 enable_mask)
2735 {
2736         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2737
2738         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2739 }
2740
2741 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2742 {
2743         struct gaudi_device *gaudi = hdev->asic_specific;
2744         struct hl_hw_queue *q;
2745         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2746
2747         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2748                 return;
2749
2750         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2751                 dma_id = gaudi_dma_assignment[i];
2752                 /*
2753                  * For queues after the CPU Q need to add 1 to get the correct
2754                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2755                  * order to get the correct MSI register.
2756                  */
2757                 if (dma_id > 1) {
2758                         cpu_skip = 1;
2759                         nic_skip = NIC_NUMBER_OF_ENGINES;
2760                 } else {
2761                         cpu_skip = 0;
2762                         nic_skip = 0;
2763                 }
2764
2765                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2766                         q_idx = 4 * dma_id + j + cpu_skip;
2767                         q = &hdev->kernel_queues[q_idx];
2768                         q->cq_id = cq_id++;
2769                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2770                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2771                                                 q->bus_address);
2772                 }
2773
2774                 gaudi_init_dma_core(hdev, dma_id);
2775
2776                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2777         }
2778
2779         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2780 }
2781
2782 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2783                                         int qman_id, u64 qman_base_addr)
2784 {
2785         struct cpu_dyn_regs *dyn_regs =
2786                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2787         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2788         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2789         u32 dma_qm_err_cfg, irq_handler_offset;
2790         u32 q_off, dma_qm_offset;
2791
2792         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2793
2794         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2795                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2796         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2797                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2798         so_base_en_lo = lower_32_bits(CFG_BASE +
2799                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2800         so_base_en_hi = upper_32_bits(CFG_BASE +
2801                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2802         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2803                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2804         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2805                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2806         so_base_ws_lo = lower_32_bits(CFG_BASE +
2807                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2808         so_base_ws_hi = upper_32_bits(CFG_BASE +
2809                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2810
2811         q_off = dma_qm_offset + qman_id * 4;
2812
2813         if (qman_id < 4) {
2814                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2815                                         lower_32_bits(qman_base_addr));
2816                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2817                                         upper_32_bits(qman_base_addr));
2818
2819                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2820                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2821                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2822
2823                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2824                                                         QMAN_CPDMA_SIZE_OFFSET);
2825                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2826                                                         QMAN_CPDMA_SRC_OFFSET);
2827                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2828                                                         QMAN_CPDMA_DST_OFFSET);
2829         } else {
2830                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2831                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2832                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2833
2834                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2835                                                         QMAN_LDMA_SIZE_OFFSET);
2836                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2837                                                         QMAN_LDMA_SRC_OFFSET);
2838                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2839                                                         QMAN_LDMA_DST_OFFSET);
2840
2841                 /* Configure RAZWI IRQ */
2842                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2843                 if (hdev->stop_on_err)
2844                         dma_qm_err_cfg |=
2845                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2846
2847                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2848
2849                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2850                         lower_32_bits(CFG_BASE + irq_handler_offset));
2851                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2852                         upper_32_bits(CFG_BASE + irq_handler_offset));
2853
2854                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2855                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2856                                                                         dma_id);
2857
2858                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2859                                 QM_ARB_ERR_MSG_EN_MASK);
2860
2861                 /* Set timeout to maximum */
2862                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2863
2864                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2865                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2866                                 QMAN_INTERNAL_MAKE_TRUSTED);
2867         }
2868
2869         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2870         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2871         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2872         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2873
2874         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2875         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2876                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2877                                 mtr_base_ws_lo);
2878                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2879                                 mtr_base_ws_hi);
2880                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2881                                 so_base_ws_lo);
2882                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2883                                 so_base_ws_hi);
2884         }
2885 }
2886
2887 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2888 {
2889         struct gaudi_device *gaudi = hdev->asic_specific;
2890         struct gaudi_internal_qman_info *q;
2891         u64 qman_base_addr;
2892         int i, j, dma_id, internal_q_index;
2893
2894         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2895                 return;
2896
2897         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2898                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2899
2900                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2901                          /*
2902                           * Add the CPU queue in order to get the correct queue
2903                           * number as all internal queue are placed after it
2904                           */
2905                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2906
2907                         q = &gaudi->internal_qmans[internal_q_index];
2908                         qman_base_addr = (u64) q->pq_dma_addr;
2909                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2910                                                 qman_base_addr);
2911                 }
2912
2913                 /* Initializing lower CP for HBM DMA QMAN */
2914                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2915
2916                 gaudi_init_dma_core(hdev, dma_id);
2917
2918                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2919         }
2920
2921         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2922 }
2923
2924 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2925                                         int qman_id, u64 qman_base_addr)
2926 {
2927         struct cpu_dyn_regs *dyn_regs =
2928                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2929         u32 mtr_base_lo, mtr_base_hi;
2930         u32 so_base_lo, so_base_hi;
2931         u32 irq_handler_offset;
2932         u32 q_off, mme_id;
2933         u32 mme_qm_err_cfg;
2934
2935         mtr_base_lo = lower_32_bits(CFG_BASE +
2936                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2937         mtr_base_hi = upper_32_bits(CFG_BASE +
2938                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2939         so_base_lo = lower_32_bits(CFG_BASE +
2940                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2941         so_base_hi = upper_32_bits(CFG_BASE +
2942                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2943
2944         q_off = mme_offset + qman_id * 4;
2945
2946         if (qman_id < 4) {
2947                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2948                                         lower_32_bits(qman_base_addr));
2949                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2950                                         upper_32_bits(qman_base_addr));
2951
2952                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2953                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2954                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2955
2956                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2957                                                         QMAN_CPDMA_SIZE_OFFSET);
2958                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2959                                                         QMAN_CPDMA_SRC_OFFSET);
2960                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2961                                                         QMAN_CPDMA_DST_OFFSET);
2962         } else {
2963                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2964                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2965                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2966
2967                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2968                                                         QMAN_LDMA_SIZE_OFFSET);
2969                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2970                                                         QMAN_LDMA_SRC_OFFSET);
2971                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2972                                                         QMAN_LDMA_DST_OFFSET);
2973
2974                 /* Configure RAZWI IRQ */
2975                 mme_id = mme_offset /
2976                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2977
2978                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2979                 if (hdev->stop_on_err)
2980                         mme_qm_err_cfg |=
2981                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2982
2983                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2984
2985                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2986                         lower_32_bits(CFG_BASE + irq_handler_offset));
2987                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2988                         upper_32_bits(CFG_BASE + irq_handler_offset));
2989
2990                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2991                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2992                                                                         mme_id);
2993
2994                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2995                                 QM_ARB_ERR_MSG_EN_MASK);
2996
2997                 /* Set timeout to maximum */
2998                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2999
3000                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3001                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3002                                 QMAN_INTERNAL_MAKE_TRUSTED);
3003         }
3004
3005         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3006         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3007         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3008         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3009 }
3010
3011 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3012 {
3013         struct gaudi_device *gaudi = hdev->asic_specific;
3014         struct gaudi_internal_qman_info *q;
3015         u64 qman_base_addr;
3016         u32 mme_offset;
3017         int i, internal_q_index;
3018
3019         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3020                 return;
3021
3022         /*
3023          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3024          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3025          */
3026
3027         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3028
3029         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3030                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3031                 q = &gaudi->internal_qmans[internal_q_index];
3032                 qman_base_addr = (u64) q->pq_dma_addr;
3033                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3034                                         qman_base_addr);
3035                 if (i == 3)
3036                         mme_offset = 0;
3037         }
3038
3039         /* Initializing lower CP for MME QMANs */
3040         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3041         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3042         gaudi_init_mme_qman(hdev, 0, 4, 0);
3043
3044         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3045         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3046
3047         gaudi->hw_cap_initialized |= HW_CAP_MME;
3048 }
3049
3050 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3051                                 int qman_id, u64 qman_base_addr)
3052 {
3053         struct cpu_dyn_regs *dyn_regs =
3054                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3055         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3056         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3057         u32 tpc_qm_err_cfg, irq_handler_offset;
3058         u32 q_off, tpc_id;
3059
3060         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3061                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3062         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3063                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3064         so_base_en_lo = lower_32_bits(CFG_BASE +
3065                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3066         so_base_en_hi = upper_32_bits(CFG_BASE +
3067                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3068         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3069                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3070         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3071                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3072         so_base_ws_lo = lower_32_bits(CFG_BASE +
3073                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3074         so_base_ws_hi = upper_32_bits(CFG_BASE +
3075                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3076
3077         q_off = tpc_offset + qman_id * 4;
3078
3079         tpc_id = tpc_offset /
3080                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3081
3082         if (qman_id < 4) {
3083                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3084                                         lower_32_bits(qman_base_addr));
3085                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3086                                         upper_32_bits(qman_base_addr));
3087
3088                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3089                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3090                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3091
3092                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3093                                                         QMAN_CPDMA_SIZE_OFFSET);
3094                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3095                                                         QMAN_CPDMA_SRC_OFFSET);
3096                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3097                                                         QMAN_CPDMA_DST_OFFSET);
3098         } else {
3099                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3100                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3101                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3102
3103                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3104                                                         QMAN_LDMA_SIZE_OFFSET);
3105                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3106                                                         QMAN_LDMA_SRC_OFFSET);
3107                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3108                                                         QMAN_LDMA_DST_OFFSET);
3109
3110                 /* Configure RAZWI IRQ */
3111                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3112                 if (hdev->stop_on_err)
3113                         tpc_qm_err_cfg |=
3114                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3115
3116                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3117
3118                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3119                         lower_32_bits(CFG_BASE + irq_handler_offset));
3120                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3121                         upper_32_bits(CFG_BASE + irq_handler_offset));
3122
3123                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3124                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3125                                                                         tpc_id);
3126
3127                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3128                                 QM_ARB_ERR_MSG_EN_MASK);
3129
3130                 /* Set timeout to maximum */
3131                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3132
3133                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3134                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3135                                 QMAN_INTERNAL_MAKE_TRUSTED);
3136         }
3137
3138         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3139         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3140         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3141         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3142
3143         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3144         if (tpc_id == 6) {
3145                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3146                                 mtr_base_ws_lo);
3147                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3148                                 mtr_base_ws_hi);
3149                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3150                                 so_base_ws_lo);
3151                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3152                                 so_base_ws_hi);
3153         }
3154 }
3155
3156 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3157 {
3158         struct gaudi_device *gaudi = hdev->asic_specific;
3159         struct gaudi_internal_qman_info *q;
3160         u64 qman_base_addr;
3161         u32 so_base_hi, tpc_offset = 0;
3162         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3163                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3164         int i, tpc_id, internal_q_index;
3165
3166         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3167                 return;
3168
3169         so_base_hi = upper_32_bits(CFG_BASE +
3170                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171
3172         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3173                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3174                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3175                                                 tpc_id * QMAN_STREAMS + i;
3176                         q = &gaudi->internal_qmans[internal_q_index];
3177                         qman_base_addr = (u64) q->pq_dma_addr;
3178                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3179                                                 qman_base_addr);
3180
3181                         if (i == 3) {
3182                                 /* Initializing lower CP for TPC QMAN */
3183                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3184
3185                                 /* Enable the QMAN and TPC channel */
3186                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3187                                                 QMAN_TPC_ENABLE);
3188                         }
3189                 }
3190
3191                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3192                                 so_base_hi);
3193
3194                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3195
3196                 gaudi->hw_cap_initialized |=
3197                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3198         }
3199 }
3200
3201 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3202                                 int qman_id, u64 qman_base_addr, int nic_id)
3203 {
3204         struct cpu_dyn_regs *dyn_regs =
3205                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3206         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3207         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3208         u32 nic_qm_err_cfg, irq_handler_offset;
3209         u32 q_off;
3210
3211         mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3212                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3213         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3214                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3215         so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3216                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3217         so_base_en_hi = upper_32_bits(CFG_BASE +
3218                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3219         mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3220                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3221         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3222                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3223         so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3224                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3225         so_base_ws_hi = upper_32_bits(CFG_BASE +
3226                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3227
3228         q_off = nic_offset + qman_id * 4;
3229
3230         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3231         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3232
3233         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3234         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3235         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3236
3237         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3238                                                         QMAN_LDMA_SIZE_OFFSET);
3239         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3240                                                         QMAN_LDMA_SRC_OFFSET);
3241         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3242                                                         QMAN_LDMA_DST_OFFSET);
3243
3244         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3245         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3246         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3247         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3248
3249         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3250         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3251         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3252         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3253         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3254
3255         if (qman_id == 0) {
3256                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3257                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3258                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3259
3260                 /* Configure RAZWI IRQ */
3261                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3262                 if (hdev->stop_on_err)
3263                         nic_qm_err_cfg |=
3264                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3265
3266                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3267
3268                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3269                         lower_32_bits(CFG_BASE + irq_handler_offset));
3270                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3271                         upper_32_bits(CFG_BASE + irq_handler_offset));
3272
3273                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3274                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3275                                                                         nic_id);
3276
3277                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3278                                 QM_ARB_ERR_MSG_EN_MASK);
3279
3280                 /* Set timeout to maximum */
3281                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3282
3283                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3284                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3285                                 QMAN_INTERNAL_MAKE_TRUSTED);
3286         }
3287 }
3288
3289 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3290 {
3291         struct gaudi_device *gaudi = hdev->asic_specific;
3292         struct gaudi_internal_qman_info *q;
3293         u64 qman_base_addr;
3294         u32 nic_offset = 0;
3295         u32 nic_delta_between_qmans =
3296                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3297         u32 nic_delta_between_nics =
3298                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3299         int i, nic_id, internal_q_index;
3300
3301         if (!hdev->nic_ports_mask)
3302                 return;
3303
3304         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3305                 return;
3306
3307         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3308
3309         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3310                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3311                         nic_offset += nic_delta_between_qmans;
3312                         if (nic_id & 1) {
3313                                 nic_offset -= (nic_delta_between_qmans * 2);
3314                                 nic_offset += nic_delta_between_nics;
3315                         }
3316                         continue;
3317                 }
3318
3319                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3320                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3321                                                 nic_id * QMAN_STREAMS + i;
3322                         q = &gaudi->internal_qmans[internal_q_index];
3323                         qman_base_addr = (u64) q->pq_dma_addr;
3324                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3325                                                 qman_base_addr, nic_id);
3326                 }
3327
3328                 /* Enable the QMAN */
3329                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3330
3331                 nic_offset += nic_delta_between_qmans;
3332                 if (nic_id & 1) {
3333                         nic_offset -= (nic_delta_between_qmans * 2);
3334                         nic_offset += nic_delta_between_nics;
3335                 }
3336
3337                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3338         }
3339 }
3340
3341 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3342 {
3343         struct gaudi_device *gaudi = hdev->asic_specific;
3344
3345         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3346                 return;
3347
3348         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3349         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3350         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3351 }
3352
3353 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3354 {
3355         struct gaudi_device *gaudi = hdev->asic_specific;
3356
3357         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3358                 return;
3359
3360         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3361         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3362         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3363         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3364         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3365 }
3366
3367 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3368 {
3369         struct gaudi_device *gaudi = hdev->asic_specific;
3370
3371         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3372                 return;
3373
3374         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3375         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3376 }
3377
3378 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3379 {
3380         struct gaudi_device *gaudi = hdev->asic_specific;
3381         u32 tpc_offset = 0;
3382         int tpc_id;
3383
3384         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3385                 return;
3386
3387         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3388                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3389                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3390         }
3391 }
3392
3393 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3394 {
3395         struct gaudi_device *gaudi = hdev->asic_specific;
3396         u32 nic_mask, nic_offset = 0;
3397         u32 nic_delta_between_qmans =
3398                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3399         u32 nic_delta_between_nics =
3400                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3401         int nic_id;
3402
3403         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3404                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3405
3406                 if (gaudi->hw_cap_initialized & nic_mask)
3407                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3408
3409                 nic_offset += nic_delta_between_qmans;
3410                 if (nic_id & 1) {
3411                         nic_offset -= (nic_delta_between_qmans * 2);
3412                         nic_offset += nic_delta_between_nics;
3413                 }
3414         }
3415 }
3416
3417 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3418 {
3419         struct gaudi_device *gaudi = hdev->asic_specific;
3420
3421         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3422                 return;
3423
3424         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3425         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3426         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3427         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3428 }
3429
3430 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3431 {
3432         struct gaudi_device *gaudi = hdev->asic_specific;
3433
3434         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3435                 return;
3436
3437         /* Stop CPs of HBM DMA QMANs */
3438
3439         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3440         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3441         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3442         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3443         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3444 }
3445
3446 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3447 {
3448         struct gaudi_device *gaudi = hdev->asic_specific;
3449
3450         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3451                 return;
3452
3453         /* Stop CPs of MME QMANs */
3454         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3455         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3456 }
3457
3458 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3459 {
3460         struct gaudi_device *gaudi = hdev->asic_specific;
3461
3462         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3463                 return;
3464
3465         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3466         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3467         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3468         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3469         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3470         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3471         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3472         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473 }
3474
3475 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3476 {
3477         struct gaudi_device *gaudi = hdev->asic_specific;
3478
3479         /* Stop upper CPs of QMANs */
3480
3481         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3482                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3483                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3484                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3485                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3486
3487         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3488                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3489                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3490                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3491                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3492
3493         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3494                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3495                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3496                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3497                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3498
3499         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3500                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3501                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3502                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3503                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3504
3505         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3506                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3507                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3508                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3509                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3510
3511         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3512                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3513                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3514                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3515                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3516
3517         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3518                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3519                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3520                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3521                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3522
3523         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3524                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3525                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3526                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3527                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3528
3529         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3530                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3531                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3532                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3533                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3534
3535         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3536                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3537                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3538                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3539                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3540 }
3541
3542 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3543 {
3544         struct gaudi_device *gaudi = hdev->asic_specific;
3545
3546         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3547                 return;
3548
3549         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3550         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3551         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3552 }
3553
3554 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3555 {
3556         struct gaudi_device *gaudi = hdev->asic_specific;
3557
3558         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3559                 return;
3560
3561         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3562         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3563         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3564         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3565         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3566 }
3567
3568 static void gaudi_mme_stall(struct hl_device *hdev)
3569 {
3570         struct gaudi_device *gaudi = hdev->asic_specific;
3571
3572         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3573                 return;
3574
3575         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3576         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3577         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3578         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3579         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3580         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3581         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3582         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3583         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3584         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3585         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3586         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3587         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3588         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3589         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3590         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3591         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3592 }
3593
3594 static void gaudi_tpc_stall(struct hl_device *hdev)
3595 {
3596         struct gaudi_device *gaudi = hdev->asic_specific;
3597
3598         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3599                 return;
3600
3601         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3602         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3603         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3604         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3605         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3606         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3607         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3608         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3609 }
3610
3611 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3612 {
3613         u32 qman_offset;
3614         int i;
3615
3616         if (hdev->asic_prop.fw_security_enabled)
3617                 return;
3618
3619         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3620                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3621                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3622
3623                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3624         }
3625
3626         WREG32(mmMME0_QM_CGM_CFG, 0);
3627         WREG32(mmMME0_QM_CGM_CFG1, 0);
3628         WREG32(mmMME2_QM_CGM_CFG, 0);
3629         WREG32(mmMME2_QM_CGM_CFG1, 0);
3630
3631         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3632                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3633                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3634
3635                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3636         }
3637 }
3638
3639 static void gaudi_enable_timestamp(struct hl_device *hdev)
3640 {
3641         /* Disable the timestamp counter */
3642         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3643
3644         /* Zero the lower/upper parts of the 64-bit counter */
3645         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3646         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3647
3648         /* Enable the counter */
3649         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3650 }
3651
3652 static void gaudi_disable_timestamp(struct hl_device *hdev)
3653 {
3654         /* Disable the timestamp counter */
3655         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3656 }
3657
3658 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3659 {
3660         u32 wait_timeout_ms;
3661
3662         if (hdev->pldm)
3663                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3664         else
3665                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3666
3667         if (fw_reset)
3668                 goto skip_engines;
3669
3670         gaudi_stop_nic_qmans(hdev);
3671         gaudi_stop_mme_qmans(hdev);
3672         gaudi_stop_tpc_qmans(hdev);
3673         gaudi_stop_hbm_dma_qmans(hdev);
3674         gaudi_stop_pci_dma_qmans(hdev);
3675
3676         msleep(wait_timeout_ms);
3677
3678         gaudi_pci_dma_stall(hdev);
3679         gaudi_hbm_dma_stall(hdev);
3680         gaudi_tpc_stall(hdev);
3681         gaudi_mme_stall(hdev);
3682
3683         msleep(wait_timeout_ms);
3684
3685         gaudi_disable_nic_qmans(hdev);
3686         gaudi_disable_mme_qmans(hdev);
3687         gaudi_disable_tpc_qmans(hdev);
3688         gaudi_disable_hbm_dma_qmans(hdev);
3689         gaudi_disable_pci_dma_qmans(hdev);
3690
3691         gaudi_disable_timestamp(hdev);
3692
3693 skip_engines:
3694         gaudi_disable_msi(hdev);
3695 }
3696
3697 static int gaudi_mmu_init(struct hl_device *hdev)
3698 {
3699         struct asic_fixed_properties *prop = &hdev->asic_prop;
3700         struct gaudi_device *gaudi = hdev->asic_specific;
3701         u64 hop0_addr;
3702         int rc, i;
3703
3704         if (!hdev->mmu_enable)
3705                 return 0;
3706
3707         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3708                 return 0;
3709
3710         for (i = 0 ; i < prop->max_asid ; i++) {
3711                 hop0_addr = prop->mmu_pgt_addr +
3712                                 (i * prop->mmu_hop_table_size);
3713
3714                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3715                 if (rc) {
3716                         dev_err(hdev->dev,
3717                                 "failed to set hop0 addr for asid %d\n", i);
3718                         goto err;
3719                 }
3720         }
3721
3722         /* init MMU cache manage page */
3723         WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3724         WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3725
3726         /* mem cache invalidation */
3727         WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3728
3729         hl_mmu_invalidate_cache(hdev, true, 0);
3730
3731         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3732         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3733
3734         WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3735
3736         /*
3737          * The H/W expects the first PI after init to be 1. After wraparound
3738          * we'll write 0.
3739          */
3740         gaudi->mmu_cache_inv_pi = 1;
3741
3742         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3743
3744         return 0;
3745
3746 err:
3747         return rc;
3748 }
3749
3750 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3751 {
3752         void __iomem *dst;
3753
3754         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3755
3756         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3757 }
3758
3759 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3760 {
3761         void __iomem *dst;
3762
3763         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3764
3765         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3766 }
3767
3768 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3769 {
3770         struct dynamic_fw_load_mgr *dynamic_loader;
3771         struct cpu_dyn_regs *dyn_regs;
3772
3773         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3774
3775         /*
3776          * here we update initial values for few specific dynamic regs (as
3777          * before reading the first descriptor from FW those value has to be
3778          * hard-coded) in later stages of the protocol those values will be
3779          * updated automatically by reading the FW descriptor so data there
3780          * will always be up-to-date
3781          */
3782         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3783         dyn_regs->kmd_msg_to_cpu =
3784                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3785         dyn_regs->cpu_cmd_status_to_host =
3786                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3787
3788         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3789 }
3790
3791 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3792 {
3793         struct static_fw_load_mgr *static_loader;
3794
3795         static_loader = &hdev->fw_loader.static_loader;
3796
3797         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3798         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3799         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3800         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3801         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3802         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3803         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3804         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3805         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3806         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3807         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3808         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3809         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3810                         GAUDI_PLDM_RESET_WAIT_MSEC :
3811                         GAUDI_CPU_RESET_WAIT_MSEC;
3812 }
3813
3814 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3815 {
3816         struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3817
3818         pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3819         pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3820         pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3821         pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3822         pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3823         pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3824 }
3825
3826 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3827 {
3828         struct asic_fixed_properties *prop = &hdev->asic_prop;
3829         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3830
3831         /* fill common fields */
3832         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3833         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3834         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3835         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3836         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3837         fw_loader->skip_bmc = !hdev->bmc_enable;
3838         fw_loader->sram_bar_id = SRAM_BAR_ID;
3839         fw_loader->dram_bar_id = HBM_BAR_ID;
3840
3841         if (prop->dynamic_fw_load)
3842                 gaudi_init_dynamic_firmware_loader(hdev);
3843         else
3844                 gaudi_init_static_firmware_loader(hdev);
3845 }
3846
3847 static int gaudi_init_cpu(struct hl_device *hdev)
3848 {
3849         struct gaudi_device *gaudi = hdev->asic_specific;
3850         int rc;
3851
3852         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3853                 return 0;
3854
3855         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3856                 return 0;
3857
3858         /*
3859          * The device CPU works with 40 bits addresses.
3860          * This register sets the extension to 50 bits.
3861          */
3862         if (!hdev->asic_prop.fw_security_enabled)
3863                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3864
3865         rc = hl_fw_init_cpu(hdev);
3866
3867         if (rc)
3868                 return rc;
3869
3870         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3871
3872         return 0;
3873 }
3874
3875 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3876 {
3877         struct cpu_dyn_regs *dyn_regs =
3878                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3879         struct asic_fixed_properties *prop = &hdev->asic_prop;
3880         struct gaudi_device *gaudi = hdev->asic_specific;
3881         u32 status, irq_handler_offset;
3882         struct hl_eq *eq;
3883         struct hl_hw_queue *cpu_pq =
3884                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3885         int err;
3886
3887         if (!hdev->cpu_queues_enable)
3888                 return 0;
3889
3890         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3891                 return 0;
3892
3893         eq = &hdev->event_queue;
3894
3895         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3896         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3897
3898         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3899         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3900
3901         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3902                         lower_32_bits(hdev->cpu_accessible_dma_address));
3903         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3904                         upper_32_bits(hdev->cpu_accessible_dma_address));
3905
3906         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3907         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3908         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3909
3910         /* Used for EQ CI */
3911         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3912
3913         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3914
3915         if (gaudi->multi_msi_mode)
3916                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3917         else
3918                 WREG32(mmCPU_IF_QUEUE_INIT,
3919                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3920
3921         irq_handler_offset = prop->gic_interrupts_enable ?
3922                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3923                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3924
3925         WREG32(irq_handler_offset,
3926                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3927
3928         err = hl_poll_timeout(
3929                 hdev,
3930                 mmCPU_IF_QUEUE_INIT,
3931                 status,
3932                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3933                 1000,
3934                 cpu_timeout);
3935
3936         if (err) {
3937                 dev_err(hdev->dev,
3938                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3939                 return -EIO;
3940         }
3941
3942         /* update FW application security bits */
3943         if (prop->fw_cpu_boot_dev_sts0_valid)
3944                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3945         if (prop->fw_cpu_boot_dev_sts1_valid)
3946                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3947
3948         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3949         return 0;
3950 }
3951
3952 static void gaudi_pre_hw_init(struct hl_device *hdev)
3953 {
3954         /* Perform read from the device to make sure device is up */
3955         RREG32(mmHW_STATE);
3956
3957         if (!hdev->asic_prop.fw_security_enabled) {
3958                 /* Set the access through PCI bars (Linux driver only) as
3959                  * secured
3960                  */
3961                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3962                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3963                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3964
3965                 /* Perform read to flush the waiting writes to ensure
3966                  * configuration was set in the device
3967                  */
3968                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3969         }
3970
3971         /*
3972          * Let's mark in the H/W that we have reached this point. We check
3973          * this value in the reset_before_init function to understand whether
3974          * we need to reset the chip before doing H/W init. This register is
3975          * cleared by the H/W upon H/W reset
3976          */
3977         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3978 }
3979
3980 static int gaudi_hw_init(struct hl_device *hdev)
3981 {
3982         struct gaudi_device *gaudi = hdev->asic_specific;
3983         int rc;
3984
3985         gaudi_pre_hw_init(hdev);
3986
3987         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3988          * So we set it here and if anyone tries to move it later to
3989          * a different address, there will be an error
3990          */
3991         if (hdev->asic_prop.iatu_done_by_fw)
3992                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3993
3994         /*
3995          * Before pushing u-boot/linux to device, need to set the hbm bar to
3996          * base address of dram
3997          */
3998         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3999                 dev_err(hdev->dev,
4000                         "failed to map HBM bar to DRAM base address\n");
4001                 return -EIO;
4002         }
4003
4004         rc = gaudi_init_cpu(hdev);
4005         if (rc) {
4006                 dev_err(hdev->dev, "failed to initialize CPU\n");
4007                 return rc;
4008         }
4009
4010         /* In case the clock gating was enabled in preboot we need to disable
4011          * it here before touching the MME/TPC registers.
4012          */
4013         gaudi_disable_clock_gating(hdev);
4014
4015         /* SRAM scrambler must be initialized after CPU is running from HBM */
4016         gaudi_init_scrambler_sram(hdev);
4017
4018         /* This is here just in case we are working without CPU */
4019         gaudi_init_scrambler_hbm(hdev);
4020
4021         gaudi_init_golden_registers(hdev);
4022
4023         rc = gaudi_mmu_init(hdev);
4024         if (rc)
4025                 return rc;
4026
4027         gaudi_init_security(hdev);
4028
4029         gaudi_init_pci_dma_qmans(hdev);
4030
4031         gaudi_init_hbm_dma_qmans(hdev);
4032
4033         gaudi_init_mme_qmans(hdev);
4034
4035         gaudi_init_tpc_qmans(hdev);
4036
4037         gaudi_init_nic_qmans(hdev);
4038
4039         gaudi_enable_timestamp(hdev);
4040
4041         /* MSI must be enabled before CPU queues and NIC are initialized */
4042         rc = gaudi_enable_msi(hdev);
4043         if (rc)
4044                 goto disable_queues;
4045
4046         /* must be called after MSI was enabled */
4047         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4048         if (rc) {
4049                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4050                         rc);
4051                 goto disable_msi;
4052         }
4053
4054         /* Perform read from the device to flush all configuration */
4055         RREG32(mmHW_STATE);
4056
4057         return 0;
4058
4059 disable_msi:
4060         gaudi_disable_msi(hdev);
4061 disable_queues:
4062         gaudi_disable_mme_qmans(hdev);
4063         gaudi_disable_pci_dma_qmans(hdev);
4064
4065         return rc;
4066 }
4067
4068 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4069 {
4070         struct cpu_dyn_regs *dyn_regs =
4071                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4072         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4073         struct gaudi_device *gaudi = hdev->asic_specific;
4074         bool driver_performs_reset;
4075
4076         if (!hard_reset) {
4077                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4078                 return;
4079         }
4080
4081         if (hdev->pldm) {
4082                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4083                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4084         } else {
4085                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4086                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4087         }
4088
4089         if (fw_reset) {
4090                 dev_dbg(hdev->dev,
4091                         "Firmware performs HARD reset, going to wait %dms\n",
4092                         reset_timeout_ms);
4093
4094                 goto skip_reset;
4095         }
4096
4097         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4098                                         !hdev->asic_prop.hard_reset_done_by_fw);
4099
4100         /* Set device to handle FLR by H/W as we will put the device CPU to
4101          * halt mode
4102          */
4103         if (driver_performs_reset)
4104                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4105                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4106
4107         /* If linux is loaded in the device CPU we need to communicate with it
4108          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4109          * registers in case of old F/Ws
4110          */
4111         if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4112                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4113                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4114                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4115
4116                 WREG32(irq_handler_offset,
4117                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4118
4119                 /* This is a hail-mary attempt to revive the card in the small chance that the
4120                  * f/w has experienced a watchdog event, which caused it to return back to preboot.
4121                  * In that case, triggering reset through GIC won't help. We need to trigger the
4122                  * reset as if Linux wasn't loaded.
4123                  *
4124                  * We do it only if the reset cause was HB, because that would be the indication
4125                  * of such an event.
4126                  *
4127                  * In case watchdog hasn't expired but we still got HB, then this won't do any
4128                  * damage.
4129                  */
4130                 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4131                         if (hdev->asic_prop.hard_reset_done_by_fw)
4132                                 hl_fw_ask_hard_reset_without_linux(hdev);
4133                         else
4134                                 hl_fw_ask_halt_machine_without_linux(hdev);
4135                 }
4136         } else {
4137                 if (hdev->asic_prop.hard_reset_done_by_fw)
4138                         hl_fw_ask_hard_reset_without_linux(hdev);
4139                 else
4140                         hl_fw_ask_halt_machine_without_linux(hdev);
4141         }
4142
4143         if (driver_performs_reset) {
4144
4145                 /* Configure the reset registers. Must be done as early as
4146                  * possible in case we fail during H/W initialization
4147                  */
4148                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4149                                                 (CFG_RST_H_DMA_MASK |
4150                                                 CFG_RST_H_MME_MASK |
4151                                                 CFG_RST_H_SM_MASK |
4152                                                 CFG_RST_H_TPC_7_MASK));
4153
4154                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4155
4156                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4157                                                 (CFG_RST_H_HBM_MASK |
4158                                                 CFG_RST_H_TPC_7_MASK |
4159                                                 CFG_RST_H_NIC_MASK |
4160                                                 CFG_RST_H_SM_MASK |
4161                                                 CFG_RST_H_DMA_MASK |
4162                                                 CFG_RST_H_MME_MASK |
4163                                                 CFG_RST_H_CPU_MASK |
4164                                                 CFG_RST_H_MMU_MASK));
4165
4166                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4167                                                 (CFG_RST_L_IF_MASK |
4168                                                 CFG_RST_L_PSOC_MASK |
4169                                                 CFG_RST_L_TPC_MASK));
4170
4171                 msleep(cpu_timeout_ms);
4172
4173                 /* Tell ASIC not to re-initialize PCIe */
4174                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4175
4176                 /* Restart BTL/BLR upon hard-reset */
4177                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4178
4179                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4180                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4181
4182                 dev_dbg(hdev->dev,
4183                         "Issued HARD reset command, going to wait %dms\n",
4184                         reset_timeout_ms);
4185         } else {
4186                 dev_dbg(hdev->dev,
4187                         "Firmware performs HARD reset, going to wait %dms\n",
4188                         reset_timeout_ms);
4189         }
4190
4191 skip_reset:
4192         /*
4193          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4194          * itself is in reset. Need to wait until the reset is deasserted
4195          */
4196         msleep(reset_timeout_ms);
4197
4198         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4199         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4200                 dev_err(hdev->dev,
4201                         "Timeout while waiting for device to reset 0x%x\n",
4202                         status);
4203
4204         if (gaudi) {
4205                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4206                                                 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4207                                                 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4208                                                 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4209                                                 HW_CAP_HBM_SCRAMBLER);
4210
4211                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4212
4213                 hdev->device_cpu_is_halted = false;
4214         }
4215 }
4216
4217 static int gaudi_suspend(struct hl_device *hdev)
4218 {
4219         int rc;
4220
4221         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4222         if (rc)
4223                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4224
4225         return rc;
4226 }
4227
4228 static int gaudi_resume(struct hl_device *hdev)
4229 {
4230         return gaudi_init_iatu(hdev);
4231 }
4232
4233 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4234                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4235 {
4236         int rc;
4237
4238         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4239                         VM_DONTCOPY | VM_NORESERVE;
4240
4241         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4242                                 (dma_addr - HOST_PHYS_BASE), size);
4243         if (rc)
4244                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4245
4246         return rc;
4247 }
4248
4249 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4250 {
4251         struct cpu_dyn_regs *dyn_regs =
4252                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4253         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4254         struct gaudi_device *gaudi = hdev->asic_specific;
4255         bool invalid_queue = false;
4256         int dma_id;
4257
4258         switch (hw_queue_id) {
4259         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4260                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4261                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4262                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4263                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4264                 break;
4265
4266         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4267                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4268                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4269                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4270                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4271                 break;
4272
4273         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4274                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4275                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4276                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4277                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4278                 break;
4279
4280         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4281                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4282                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4283                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4284                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4285                 break;
4286
4287         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4288                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4289                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4290                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4291                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4292                 break;
4293
4294         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4295                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4296                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4297                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4298                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4299                 break;
4300
4301         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4302                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4303                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4304                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4305                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4306                 break;
4307
4308         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4309                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4310                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4311                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4312                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4313                 break;
4314
4315         case GAUDI_QUEUE_ID_CPU_PQ:
4316                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4317                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4318                 else
4319                         invalid_queue = true;
4320                 break;
4321
4322         case GAUDI_QUEUE_ID_MME_0_0:
4323                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4324                 break;
4325
4326         case GAUDI_QUEUE_ID_MME_0_1:
4327                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4328                 break;
4329
4330         case GAUDI_QUEUE_ID_MME_0_2:
4331                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4332                 break;
4333
4334         case GAUDI_QUEUE_ID_MME_0_3:
4335                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4336                 break;
4337
4338         case GAUDI_QUEUE_ID_MME_1_0:
4339                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4340                 break;
4341
4342         case GAUDI_QUEUE_ID_MME_1_1:
4343                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4344                 break;
4345
4346         case GAUDI_QUEUE_ID_MME_1_2:
4347                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4348                 break;
4349
4350         case GAUDI_QUEUE_ID_MME_1_3:
4351                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4352                 break;
4353
4354         case GAUDI_QUEUE_ID_TPC_0_0:
4355                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4356                 break;
4357
4358         case GAUDI_QUEUE_ID_TPC_0_1:
4359                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4360                 break;
4361
4362         case GAUDI_QUEUE_ID_TPC_0_2:
4363                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4364                 break;
4365
4366         case GAUDI_QUEUE_ID_TPC_0_3:
4367                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4368                 break;
4369
4370         case GAUDI_QUEUE_ID_TPC_1_0:
4371                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4372                 break;
4373
4374         case GAUDI_QUEUE_ID_TPC_1_1:
4375                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4376                 break;
4377
4378         case GAUDI_QUEUE_ID_TPC_1_2:
4379                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4380                 break;
4381
4382         case GAUDI_QUEUE_ID_TPC_1_3:
4383                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4384                 break;
4385
4386         case GAUDI_QUEUE_ID_TPC_2_0:
4387                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4388                 break;
4389
4390         case GAUDI_QUEUE_ID_TPC_2_1:
4391                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4392                 break;
4393
4394         case GAUDI_QUEUE_ID_TPC_2_2:
4395                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4396                 break;
4397
4398         case GAUDI_QUEUE_ID_TPC_2_3:
4399                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4400                 break;
4401
4402         case GAUDI_QUEUE_ID_TPC_3_0:
4403                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4404                 break;
4405
4406         case GAUDI_QUEUE_ID_TPC_3_1:
4407                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4408                 break;
4409
4410         case GAUDI_QUEUE_ID_TPC_3_2:
4411                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4412                 break;
4413
4414         case GAUDI_QUEUE_ID_TPC_3_3:
4415                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4416                 break;
4417
4418         case GAUDI_QUEUE_ID_TPC_4_0:
4419                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4420                 break;
4421
4422         case GAUDI_QUEUE_ID_TPC_4_1:
4423                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4424                 break;
4425
4426         case GAUDI_QUEUE_ID_TPC_4_2:
4427                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4428                 break;
4429
4430         case GAUDI_QUEUE_ID_TPC_4_3:
4431                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4432                 break;
4433
4434         case GAUDI_QUEUE_ID_TPC_5_0:
4435                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4436                 break;
4437
4438         case GAUDI_QUEUE_ID_TPC_5_1:
4439                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4440                 break;
4441
4442         case GAUDI_QUEUE_ID_TPC_5_2:
4443                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4444                 break;
4445
4446         case GAUDI_QUEUE_ID_TPC_5_3:
4447                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4448                 break;
4449
4450         case GAUDI_QUEUE_ID_TPC_6_0:
4451                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4452                 break;
4453
4454         case GAUDI_QUEUE_ID_TPC_6_1:
4455                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_TPC_6_2:
4459                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4460                 break;
4461
4462         case GAUDI_QUEUE_ID_TPC_6_3:
4463                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4464                 break;
4465
4466         case GAUDI_QUEUE_ID_TPC_7_0:
4467                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4468                 break;
4469
4470         case GAUDI_QUEUE_ID_TPC_7_1:
4471                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_TPC_7_2:
4475                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4476                 break;
4477
4478         case GAUDI_QUEUE_ID_TPC_7_3:
4479                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4480                 break;
4481
4482         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4483                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4484                         invalid_queue = true;
4485
4486                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4487                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4491                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4492                         invalid_queue = true;
4493
4494                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4495                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4499                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4500                         invalid_queue = true;
4501
4502                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4503                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4507                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4508                         invalid_queue = true;
4509
4510                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4511                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4515                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4516                         invalid_queue = true;
4517
4518                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4519                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4523                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4524                         invalid_queue = true;
4525
4526                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4527                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4531                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4532                         invalid_queue = true;
4533
4534                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4535                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4539                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4540                         invalid_queue = true;
4541
4542                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4543                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4544                 break;
4545
4546         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4547                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4548                         invalid_queue = true;
4549
4550                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4551                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4552                 break;
4553
4554         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4555                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4556                         invalid_queue = true;
4557
4558                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4559                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4560                 break;
4561
4562         default:
4563                 invalid_queue = true;
4564         }
4565
4566         if (invalid_queue) {
4567                 /* Should never get here */
4568                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4569                         hw_queue_id);
4570                 return;
4571         }
4572
4573         db_value = pi;
4574
4575         /* ring the doorbell */
4576         WREG32(db_reg_offset, db_value);
4577
4578         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4579                 /* make sure device CPU will read latest data from host */
4580                 mb();
4581
4582                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4583                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4584                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4585
4586                 WREG32(irq_handler_offset,
4587                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4588         }
4589 }
4590
4591 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4592                                 struct hl_bd *bd)
4593 {
4594         __le64 *pbd = (__le64 *) bd;
4595
4596         /* The QMANs are on the host memory so a simple copy suffice */
4597         pqe[0] = pbd[0];
4598         pqe[1] = pbd[1];
4599 }
4600
4601 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4602                                         dma_addr_t *dma_handle, gfp_t flags)
4603 {
4604         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4605                                                 dma_handle, flags);
4606
4607         /* Shift to the device's base physical address of host memory */
4608         if (kernel_addr)
4609                 *dma_handle += HOST_PHYS_BASE;
4610
4611         return kernel_addr;
4612 }
4613
4614 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4615                 void *cpu_addr, dma_addr_t dma_handle)
4616 {
4617         /* Cancel the device's base physical address of host memory */
4618         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4619
4620         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4621 }
4622
4623 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4624 {
4625         struct asic_fixed_properties *prop = &hdev->asic_prop;
4626         u64 cur_addr = prop->dram_user_base_address;
4627         u32 chunk_size, busy;
4628         int rc, dma_id;
4629
4630         while (cur_addr < prop->dram_end_address) {
4631                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4632                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4633
4634                         chunk_size =
4635                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4636
4637                         dev_dbg(hdev->dev,
4638                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4639                                 cur_addr, cur_addr + chunk_size);
4640
4641                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4642                                         lower_32_bits(val));
4643                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4644                                         upper_32_bits(val));
4645                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4646                                                 lower_32_bits(cur_addr));
4647                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4648                                                 upper_32_bits(cur_addr));
4649                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4650                                         chunk_size);
4651                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4652                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4653                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4654
4655                         cur_addr += chunk_size;
4656
4657                         if (cur_addr == prop->dram_end_address)
4658                                 break;
4659                 }
4660
4661                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4662                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4663
4664                         rc = hl_poll_timeout(
4665                                 hdev,
4666                                 mmDMA0_CORE_STS0 + dma_offset,
4667                                 busy,
4668                                 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4669                                 1000,
4670                                 HBM_SCRUBBING_TIMEOUT_US);
4671
4672                         if (rc) {
4673                                 dev_err(hdev->dev,
4674                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4675                                         dma_id);
4676                                 return -EIO;
4677                         }
4678                 }
4679         }
4680
4681         return 0;
4682 }
4683
4684 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4685 {
4686         struct asic_fixed_properties *prop = &hdev->asic_prop;
4687         u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
4688                         min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
4689         u64 addr, size, val = hdev->memory_scrub_val;
4690         ktime_t timeout;
4691         int rc = 0;
4692
4693         if (!hdev->memory_scrub)
4694                 return 0;
4695
4696         timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4697         while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4698                 if (ktime_compare(ktime_get(), timeout) > 0) {
4699                         dev_err(hdev->dev, "waiting for idle timeout\n");
4700                         return -ETIMEDOUT;
4701                 }
4702                 usleep_range((1000 >> 2) + 1, 1000);
4703         }
4704
4705         /* Scrub SRAM */
4706         addr = prop->sram_user_base_address;
4707         size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4708
4709         dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4710                         addr, addr + size, val);
4711         rc = gaudi_memset_device_memory(hdev, addr, size, val);
4712         if (rc) {
4713                 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4714                 return rc;
4715         }
4716
4717         /* Scrub HBM using all DMA channels in parallel */
4718         rc = gaudi_scrub_device_dram(hdev, val);
4719         if (rc) {
4720                 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4721                 return rc;
4722         }
4723
4724         return 0;
4725 }
4726
4727 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4728                                 u32 queue_id, dma_addr_t *dma_handle,
4729                                 u16 *queue_len)
4730 {
4731         struct gaudi_device *gaudi = hdev->asic_specific;
4732         struct gaudi_internal_qman_info *q;
4733
4734         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4735                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4736                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4737                 return NULL;
4738         }
4739
4740         q = &gaudi->internal_qmans[queue_id];
4741         *dma_handle = q->pq_dma_addr;
4742         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4743
4744         return q->pq_kernel_addr;
4745 }
4746
4747 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4748                                 u16 len, u32 timeout, u64 *result)
4749 {
4750         struct gaudi_device *gaudi = hdev->asic_specific;
4751
4752         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4753                 if (result)
4754                         *result = 0;
4755                 return 0;
4756         }
4757
4758         if (!timeout)
4759                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4760
4761         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4762                                                 timeout, result);
4763 }
4764
4765 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4766 {
4767         struct packet_msg_prot *fence_pkt;
4768         dma_addr_t pkt_dma_addr;
4769         u32 fence_val, tmp, timeout_usec;
4770         dma_addr_t fence_dma_addr;
4771         u32 *fence_ptr;
4772         int rc;
4773
4774         if (hdev->pldm)
4775                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4776         else
4777                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4778
4779         fence_val = GAUDI_QMAN0_FENCE_VAL;
4780
4781         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4782         if (!fence_ptr) {
4783                 dev_err(hdev->dev,
4784                         "Failed to allocate memory for H/W queue %d testing\n",
4785                         hw_queue_id);
4786                 return -ENOMEM;
4787         }
4788
4789         *fence_ptr = 0;
4790
4791         fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4792                                                 &pkt_dma_addr);
4793         if (!fence_pkt) {
4794                 dev_err(hdev->dev,
4795                         "Failed to allocate packet for H/W queue %d testing\n",
4796                         hw_queue_id);
4797                 rc = -ENOMEM;
4798                 goto free_fence_ptr;
4799         }
4800
4801         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4802         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4803         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4804
4805         fence_pkt->ctl = cpu_to_le32(tmp);
4806         fence_pkt->value = cpu_to_le32(fence_val);
4807         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4808
4809         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4810                                         sizeof(struct packet_msg_prot),
4811                                         pkt_dma_addr);
4812         if (rc) {
4813                 dev_err(hdev->dev,
4814                         "Failed to send fence packet to H/W queue %d\n",
4815                         hw_queue_id);
4816                 goto free_pkt;
4817         }
4818
4819         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4820                                         1000, timeout_usec, true);
4821
4822         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4823
4824         if (rc == -ETIMEDOUT) {
4825                 dev_err(hdev->dev,
4826                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4827                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4828                 rc = -EIO;
4829         }
4830
4831 free_pkt:
4832         hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4833 free_fence_ptr:
4834         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4835         return rc;
4836 }
4837
4838 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4839 {
4840         struct gaudi_device *gaudi = hdev->asic_specific;
4841
4842         /*
4843          * check capability here as send_cpu_message() won't update the result
4844          * value if no capability
4845          */
4846         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4847                 return 0;
4848
4849         return hl_fw_test_cpu_queue(hdev);
4850 }
4851
4852 static int gaudi_test_queues(struct hl_device *hdev)
4853 {
4854         int i, rc, ret_val = 0;
4855
4856         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4857                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4858                         rc = gaudi_test_queue(hdev, i);
4859                         if (rc)
4860                                 ret_val = -EINVAL;
4861                 }
4862         }
4863
4864         rc = gaudi_test_cpu_queue(hdev);
4865         if (rc)
4866                 ret_val = -EINVAL;
4867
4868         return ret_val;
4869 }
4870
4871 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4872                 gfp_t mem_flags, dma_addr_t *dma_handle)
4873 {
4874         void *kernel_addr;
4875
4876         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4877                 return NULL;
4878
4879         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4880
4881         /* Shift to the device's base physical address of host memory */
4882         if (kernel_addr)
4883                 *dma_handle += HOST_PHYS_BASE;
4884
4885         return kernel_addr;
4886 }
4887
4888 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4889                         dma_addr_t dma_addr)
4890 {
4891         /* Cancel the device's base physical address of host memory */
4892         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4893
4894         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4895 }
4896
4897 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4898                                         size_t size, dma_addr_t *dma_handle)
4899 {
4900         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4901 }
4902
4903 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4904                                                 size_t size, void *vaddr)
4905 {
4906         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4907 }
4908
4909 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4910 {
4911         struct scatterlist *sg, *sg_next_iter;
4912         u32 count, dma_desc_cnt;
4913         u64 len, len_next;
4914         dma_addr_t addr, addr_next;
4915
4916         dma_desc_cnt = 0;
4917
4918         for_each_sgtable_dma_sg(sgt, sg, count) {
4919                 len = sg_dma_len(sg);
4920                 addr = sg_dma_address(sg);
4921
4922                 if (len == 0)
4923                         break;
4924
4925                 while ((count + 1) < sgt->nents) {
4926                         sg_next_iter = sg_next(sg);
4927                         len_next = sg_dma_len(sg_next_iter);
4928                         addr_next = sg_dma_address(sg_next_iter);
4929
4930                         if (len_next == 0)
4931                                 break;
4932
4933                         if ((addr + len == addr_next) &&
4934                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4935                                 len += len_next;
4936                                 count++;
4937                                 sg = sg_next_iter;
4938                         } else {
4939                                 break;
4940                         }
4941                 }
4942
4943                 dma_desc_cnt++;
4944         }
4945
4946         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4947 }
4948
4949 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4950                                 struct hl_cs_parser *parser,
4951                                 struct packet_lin_dma *user_dma_pkt,
4952                                 u64 addr, enum dma_data_direction dir)
4953 {
4954         struct hl_userptr *userptr;
4955         int rc;
4956
4957         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4958                         parser->job_userptr_list, &userptr))
4959                 goto already_pinned;
4960
4961         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4962         if (!userptr)
4963                 return -ENOMEM;
4964
4965         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4966                                 userptr);
4967         if (rc)
4968                 goto free_userptr;
4969
4970         list_add_tail(&userptr->job_node, parser->job_userptr_list);
4971
4972         rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
4973         if (rc) {
4974                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4975                 goto unpin_memory;
4976         }
4977
4978         userptr->dma_mapped = true;
4979         userptr->dir = dir;
4980
4981 already_pinned:
4982         parser->patched_cb_size +=
4983                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4984
4985         return 0;
4986
4987 unpin_memory:
4988         list_del(&userptr->job_node);
4989         hl_unpin_host_memory(hdev, userptr);
4990 free_userptr:
4991         kfree(userptr);
4992         return rc;
4993 }
4994
4995 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4996                                 struct hl_cs_parser *parser,
4997                                 struct packet_lin_dma *user_dma_pkt,
4998                                 bool src_in_host)
4999 {
5000         enum dma_data_direction dir;
5001         bool skip_host_mem_pin = false, user_memset;
5002         u64 addr;
5003         int rc = 0;
5004
5005         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5006                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5007                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5008
5009         if (src_in_host) {
5010                 if (user_memset)
5011                         skip_host_mem_pin = true;
5012
5013                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5014                 dir = DMA_TO_DEVICE;
5015                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5016         } else {
5017                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5018                 dir = DMA_FROM_DEVICE;
5019                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5020                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5021                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5022         }
5023
5024         if (skip_host_mem_pin)
5025                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5026         else
5027                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5028                                                 addr, dir);
5029
5030         return rc;
5031 }
5032
5033 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5034                                 struct hl_cs_parser *parser,
5035                                 struct packet_lin_dma *user_dma_pkt)
5036 {
5037         bool src_in_host = false;
5038         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5039                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5040                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5041
5042         dev_dbg(hdev->dev, "DMA packet details:\n");
5043         dev_dbg(hdev->dev, "source == 0x%llx\n",
5044                                 le64_to_cpu(user_dma_pkt->src_addr));
5045         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5046         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5047
5048         /*
5049          * Special handling for DMA with size 0. Bypass all validations
5050          * because no transactions will be done except for WR_COMP, which
5051          * is not a security issue
5052          */
5053         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5054                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5055                 return 0;
5056         }
5057
5058         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5059                 src_in_host = true;
5060
5061         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5062                                                 src_in_host);
5063 }
5064
5065 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5066                                         struct hl_cs_parser *parser,
5067                                         struct packet_load_and_exe *user_pkt)
5068 {
5069         u32 cfg;
5070
5071         cfg = le32_to_cpu(user_pkt->cfg);
5072
5073         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5074                 dev_err(hdev->dev,
5075                         "User not allowed to use Load and Execute\n");
5076                 return -EPERM;
5077         }
5078
5079         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5080
5081         return 0;
5082 }
5083
5084 static int gaudi_validate_cb(struct hl_device *hdev,
5085                         struct hl_cs_parser *parser, bool is_mmu)
5086 {
5087         u32 cb_parsed_length = 0;
5088         int rc = 0;
5089
5090         parser->patched_cb_size = 0;
5091
5092         /* cb_user_size is more than 0 so loop will always be executed */
5093         while (cb_parsed_length < parser->user_cb_size) {
5094                 enum packet_id pkt_id;
5095                 u16 pkt_size;
5096                 struct gaudi_packet *user_pkt;
5097
5098                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5099
5100                 pkt_id = (enum packet_id) (
5101                                 (le64_to_cpu(user_pkt->header) &
5102                                 PACKET_HEADER_PACKET_ID_MASK) >>
5103                                         PACKET_HEADER_PACKET_ID_SHIFT);
5104
5105                 if (!validate_packet_id(pkt_id)) {
5106                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5107                         rc = -EINVAL;
5108                         break;
5109                 }
5110
5111                 pkt_size = gaudi_packet_sizes[pkt_id];
5112                 cb_parsed_length += pkt_size;
5113                 if (cb_parsed_length > parser->user_cb_size) {
5114                         dev_err(hdev->dev,
5115                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5116                         rc = -EINVAL;
5117                         break;
5118                 }
5119
5120                 switch (pkt_id) {
5121                 case PACKET_MSG_PROT:
5122                         dev_err(hdev->dev,
5123                                 "User not allowed to use MSG_PROT\n");
5124                         rc = -EPERM;
5125                         break;
5126
5127                 case PACKET_CP_DMA:
5128                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5129                         rc = -EPERM;
5130                         break;
5131
5132                 case PACKET_STOP:
5133                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5134                         rc = -EPERM;
5135                         break;
5136
5137                 case PACKET_WREG_BULK:
5138                         dev_err(hdev->dev,
5139                                 "User not allowed to use WREG_BULK\n");
5140                         rc = -EPERM;
5141                         break;
5142
5143                 case PACKET_LOAD_AND_EXE:
5144                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5145                                 (struct packet_load_and_exe *) user_pkt);
5146                         break;
5147
5148                 case PACKET_LIN_DMA:
5149                         parser->contains_dma_pkt = true;
5150                         if (is_mmu)
5151                                 parser->patched_cb_size += pkt_size;
5152                         else
5153                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5154                                         (struct packet_lin_dma *) user_pkt);
5155                         break;
5156
5157                 case PACKET_WREG_32:
5158                 case PACKET_MSG_LONG:
5159                 case PACKET_MSG_SHORT:
5160                 case PACKET_REPEAT:
5161                 case PACKET_FENCE:
5162                 case PACKET_NOP:
5163                 case PACKET_ARB_POINT:
5164                         parser->patched_cb_size += pkt_size;
5165                         break;
5166
5167                 default:
5168                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5169                                 pkt_id);
5170                         rc = -EINVAL;
5171                         break;
5172                 }
5173
5174                 if (rc)
5175                         break;
5176         }
5177
5178         /*
5179          * The new CB should have space at the end for two MSG_PROT packets:
5180          * 1. Optional NOP padding for cacheline alignment
5181          * 2. A packet that will act as a completion packet
5182          * 3. A packet that will generate MSI interrupt
5183          */
5184         if (parser->completion)
5185                 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5186                         parser->patched_cb_size);
5187
5188         return rc;
5189 }
5190
5191 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5192                                 struct hl_cs_parser *parser,
5193                                 struct packet_lin_dma *user_dma_pkt,
5194                                 struct packet_lin_dma *new_dma_pkt,
5195                                 u32 *new_dma_pkt_size)
5196 {
5197         struct hl_userptr *userptr;
5198         struct scatterlist *sg, *sg_next_iter;
5199         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5200         u64 len, len_next;
5201         dma_addr_t dma_addr, dma_addr_next;
5202         u64 device_memory_addr, addr;
5203         enum dma_data_direction dir;
5204         struct sg_table *sgt;
5205         bool src_in_host = false;
5206         bool skip_host_mem_pin = false;
5207         bool user_memset;
5208
5209         ctl = le32_to_cpu(user_dma_pkt->ctl);
5210
5211         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5212                 src_in_host = true;
5213
5214         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5215                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5216
5217         if (src_in_host) {
5218                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5219                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5220                 dir = DMA_TO_DEVICE;
5221                 if (user_memset)
5222                         skip_host_mem_pin = true;
5223         } else {
5224                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5225                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5226                 dir = DMA_FROM_DEVICE;
5227         }
5228
5229         if ((!skip_host_mem_pin) &&
5230                 (!hl_userptr_is_pinned(hdev, addr,
5231                                         le32_to_cpu(user_dma_pkt->tsize),
5232                                         parser->job_userptr_list, &userptr))) {
5233                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5234                                 addr, user_dma_pkt->tsize);
5235                 return -EFAULT;
5236         }
5237
5238         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5239                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5240                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5241                 return 0;
5242         }
5243
5244         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5245
5246         sgt = userptr->sgt;
5247         dma_desc_cnt = 0;
5248
5249         for_each_sgtable_dma_sg(sgt, sg, count) {
5250                 len = sg_dma_len(sg);
5251                 dma_addr = sg_dma_address(sg);
5252
5253                 if (len == 0)
5254                         break;
5255
5256                 while ((count + 1) < sgt->nents) {
5257                         sg_next_iter = sg_next(sg);
5258                         len_next = sg_dma_len(sg_next_iter);
5259                         dma_addr_next = sg_dma_address(sg_next_iter);
5260
5261                         if (len_next == 0)
5262                                 break;
5263
5264                         if ((dma_addr + len == dma_addr_next) &&
5265                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5266                                 len += len_next;
5267                                 count++;
5268                                 sg = sg_next_iter;
5269                         } else {
5270                                 break;
5271                         }
5272                 }
5273
5274                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5275                 if (likely(dma_desc_cnt))
5276                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5277                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5278                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5279                 new_dma_pkt->tsize = cpu_to_le32(len);
5280
5281                 if (dir == DMA_TO_DEVICE) {
5282                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5283                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5284                 } else {
5285                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5286                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5287                 }
5288
5289                 if (!user_memset)
5290                         device_memory_addr += len;
5291                 dma_desc_cnt++;
5292                 new_dma_pkt++;
5293         }
5294
5295         if (!dma_desc_cnt) {
5296                 dev_err(hdev->dev,
5297                         "Error of 0 SG entries when patching DMA packet\n");
5298                 return -EFAULT;
5299         }
5300
5301         /* Fix the last dma packet - wrcomp must be as user set it */
5302         new_dma_pkt--;
5303         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5304
5305         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5306
5307         return 0;
5308 }
5309
5310 static int gaudi_patch_cb(struct hl_device *hdev,
5311                                 struct hl_cs_parser *parser)
5312 {
5313         u32 cb_parsed_length = 0;
5314         u32 cb_patched_cur_length = 0;
5315         int rc = 0;
5316
5317         /* cb_user_size is more than 0 so loop will always be executed */
5318         while (cb_parsed_length < parser->user_cb_size) {
5319                 enum packet_id pkt_id;
5320                 u16 pkt_size;
5321                 u32 new_pkt_size = 0;
5322                 struct gaudi_packet *user_pkt, *kernel_pkt;
5323
5324                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5325                 kernel_pkt = parser->patched_cb->kernel_address +
5326                                         cb_patched_cur_length;
5327
5328                 pkt_id = (enum packet_id) (
5329                                 (le64_to_cpu(user_pkt->header) &
5330                                 PACKET_HEADER_PACKET_ID_MASK) >>
5331                                         PACKET_HEADER_PACKET_ID_SHIFT);
5332
5333                 if (!validate_packet_id(pkt_id)) {
5334                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5335                         rc = -EINVAL;
5336                         break;
5337                 }
5338
5339                 pkt_size = gaudi_packet_sizes[pkt_id];
5340                 cb_parsed_length += pkt_size;
5341                 if (cb_parsed_length > parser->user_cb_size) {
5342                         dev_err(hdev->dev,
5343                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5344                         rc = -EINVAL;
5345                         break;
5346                 }
5347
5348                 switch (pkt_id) {
5349                 case PACKET_LIN_DMA:
5350                         rc = gaudi_patch_dma_packet(hdev, parser,
5351                                         (struct packet_lin_dma *) user_pkt,
5352                                         (struct packet_lin_dma *) kernel_pkt,
5353                                         &new_pkt_size);
5354                         cb_patched_cur_length += new_pkt_size;
5355                         break;
5356
5357                 case PACKET_MSG_PROT:
5358                         dev_err(hdev->dev,
5359                                 "User not allowed to use MSG_PROT\n");
5360                         rc = -EPERM;
5361                         break;
5362
5363                 case PACKET_CP_DMA:
5364                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5365                         rc = -EPERM;
5366                         break;
5367
5368                 case PACKET_STOP:
5369                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5370                         rc = -EPERM;
5371                         break;
5372
5373                 case PACKET_WREG_32:
5374                 case PACKET_WREG_BULK:
5375                 case PACKET_MSG_LONG:
5376                 case PACKET_MSG_SHORT:
5377                 case PACKET_REPEAT:
5378                 case PACKET_FENCE:
5379                 case PACKET_NOP:
5380                 case PACKET_ARB_POINT:
5381                 case PACKET_LOAD_AND_EXE:
5382                         memcpy(kernel_pkt, user_pkt, pkt_size);
5383                         cb_patched_cur_length += pkt_size;
5384                         break;
5385
5386                 default:
5387                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5388                                 pkt_id);
5389                         rc = -EINVAL;
5390                         break;
5391                 }
5392
5393                 if (rc)
5394                         break;
5395         }
5396
5397         return rc;
5398 }
5399
5400 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5401                 struct hl_cs_parser *parser)
5402 {
5403         u64 handle;
5404         u32 patched_cb_size;
5405         struct hl_cb *user_cb;
5406         int rc;
5407
5408         /*
5409          * The new CB should have space at the end for two MSG_PROT packets:
5410          * 1. Optional NOP padding for cacheline alignment
5411          * 2. A packet that will act as a completion packet
5412          * 3. A packet that will generate MSI interrupt
5413          */
5414         if (parser->completion)
5415                 parser->patched_cb_size = parser->user_cb_size +
5416                                 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5417         else
5418                 parser->patched_cb_size = parser->user_cb_size;
5419
5420         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5421                                 parser->patched_cb_size, false, false,
5422                                 &handle);
5423
5424         if (rc) {
5425                 dev_err(hdev->dev,
5426                         "Failed to allocate patched CB for DMA CS %d\n",
5427                         rc);
5428                 return rc;
5429         }
5430
5431         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5432         /* hl_cb_get should never fail */
5433         if (!parser->patched_cb) {
5434                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5435                 rc = -EFAULT;
5436                 goto out;
5437         }
5438
5439         /*
5440          * We are protected from overflow because the check
5441          * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5442          * in the common code. That check is done only if is_kernel_allocated_cb is true.
5443          *
5444          * There is no option to reach here without going through that check because:
5445          * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5446          *    an external queue.
5447          * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5448          */
5449         memcpy(parser->patched_cb->kernel_address,
5450                 parser->user_cb->kernel_address,
5451                 parser->user_cb_size);
5452
5453         patched_cb_size = parser->patched_cb_size;
5454
5455         /* Validate patched CB instead of user CB */
5456         user_cb = parser->user_cb;
5457         parser->user_cb = parser->patched_cb;
5458         rc = gaudi_validate_cb(hdev, parser, true);
5459         parser->user_cb = user_cb;
5460
5461         if (rc) {
5462                 hl_cb_put(parser->patched_cb);
5463                 goto out;
5464         }
5465
5466         if (patched_cb_size != parser->patched_cb_size) {
5467                 dev_err(hdev->dev, "user CB size mismatch\n");
5468                 hl_cb_put(parser->patched_cb);
5469                 rc = -EINVAL;
5470                 goto out;
5471         }
5472
5473 out:
5474         /*
5475          * Always call cb destroy here because we still have 1 reference
5476          * to it by calling cb_get earlier. After the job will be completed,
5477          * cb_put will release it, but here we want to remove it from the
5478          * idr
5479          */
5480         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5481
5482         return rc;
5483 }
5484
5485 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5486                 struct hl_cs_parser *parser)
5487 {
5488         u64 handle;
5489         int rc;
5490
5491         rc = gaudi_validate_cb(hdev, parser, false);
5492
5493         if (rc)
5494                 goto free_userptr;
5495
5496         rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5497                                 parser->patched_cb_size, false, false,
5498                                 &handle);
5499         if (rc) {
5500                 dev_err(hdev->dev,
5501                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5502                 goto free_userptr;
5503         }
5504
5505         parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5506         /* hl_cb_get should never fail here */
5507         if (!parser->patched_cb) {
5508                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5509                 rc = -EFAULT;
5510                 goto out;
5511         }
5512
5513         rc = gaudi_patch_cb(hdev, parser);
5514
5515         if (rc)
5516                 hl_cb_put(parser->patched_cb);
5517
5518 out:
5519         /*
5520          * Always call cb destroy here because we still have 1 reference
5521          * to it by calling cb_get earlier. After the job will be completed,
5522          * cb_put will release it, but here we want to remove it from the
5523          * idr
5524          */
5525         hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5526
5527 free_userptr:
5528         if (rc)
5529                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5530         return rc;
5531 }
5532
5533 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5534                                         struct hl_cs_parser *parser)
5535 {
5536         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5537         struct gaudi_device *gaudi = hdev->asic_specific;
5538         u32 nic_queue_offset, nic_mask_q_id;
5539
5540         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5541                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5542                 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5543                 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5544
5545                 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5546                         dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5547                         return -EINVAL;
5548                 }
5549         }
5550
5551         /* For internal queue jobs just check if CB address is valid */
5552         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5553                                         parser->user_cb_size,
5554                                         asic_prop->sram_user_base_address,
5555                                         asic_prop->sram_end_address))
5556                 return 0;
5557
5558         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5559                                         parser->user_cb_size,
5560                                         asic_prop->dram_user_base_address,
5561                                         asic_prop->dram_end_address))
5562                 return 0;
5563
5564         /* PMMU and HPMMU addresses are equal, check only one of them */
5565         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5566                                         parser->user_cb_size,
5567                                         asic_prop->pmmu.start_addr,
5568                                         asic_prop->pmmu.end_addr))
5569                 return 0;
5570
5571         dev_err(hdev->dev,
5572                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5573                 parser->user_cb, parser->user_cb_size);
5574
5575         return -EFAULT;
5576 }
5577
5578 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5579 {
5580         struct gaudi_device *gaudi = hdev->asic_specific;
5581
5582         if (parser->queue_type == QUEUE_TYPE_INT)
5583                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5584
5585         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5586                 return gaudi_parse_cb_mmu(hdev, parser);
5587         else
5588                 return gaudi_parse_cb_no_mmu(hdev, parser);
5589 }
5590
5591 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5592                                 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5593                                 u32 msi_vec, bool eb)
5594 {
5595         struct gaudi_device *gaudi = hdev->asic_specific;
5596         struct packet_msg_prot *cq_pkt;
5597         struct packet_nop *cq_padding;
5598         u64 msi_addr;
5599         u32 tmp;
5600
5601         cq_padding = kernel_address + original_len;
5602         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5603
5604         while ((void *)cq_padding < (void *)cq_pkt) {
5605                 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5606                 cq_padding++;
5607         }
5608
5609         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5610         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5611
5612         if (eb)
5613                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5614
5615         cq_pkt->ctl = cpu_to_le32(tmp);
5616         cq_pkt->value = cpu_to_le32(cq_val);
5617         cq_pkt->addr = cpu_to_le64(cq_addr);
5618
5619         cq_pkt++;
5620
5621         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5622         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5623         cq_pkt->ctl = cpu_to_le32(tmp);
5624         cq_pkt->value = cpu_to_le32(1);
5625
5626         if (gaudi->multi_msi_mode)
5627                 msi_addr = mmPCIE_MSI_INTR_0 + msi_vec * 4;
5628         else
5629                 msi_addr = mmPCIE_CORE_MSI_REQ;
5630
5631         cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5632 }
5633
5634 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5635 {
5636         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5637 }
5638
5639 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5640                                         u32 size, u64 val)
5641 {
5642         struct packet_lin_dma *lin_dma_pkt;
5643         struct hl_cs_job *job;
5644         u32 cb_size, ctl, err_cause;
5645         struct hl_cb *cb;
5646         int rc;
5647
5648         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5649         if (!cb)
5650                 return -EFAULT;
5651
5652         lin_dma_pkt = cb->kernel_address;
5653         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5654         cb_size = sizeof(*lin_dma_pkt);
5655
5656         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5657         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5658         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5659         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5660         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5661
5662         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5663         lin_dma_pkt->src_addr = cpu_to_le64(val);
5664         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5665         lin_dma_pkt->tsize = cpu_to_le32(size);
5666
5667         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5668         if (!job) {
5669                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5670                 rc = -ENOMEM;
5671                 goto release_cb;
5672         }
5673
5674         /* Verify DMA is OK */
5675         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5676         if (err_cause && !hdev->init_done) {
5677                 dev_dbg(hdev->dev,
5678                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5679                         err_cause);
5680                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5681         }
5682
5683         job->id = 0;
5684         job->user_cb = cb;
5685         atomic_inc(&job->user_cb->cs_cnt);
5686         job->user_cb_size = cb_size;
5687         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5688         job->patched_cb = job->user_cb;
5689         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5690
5691         hl_debugfs_add_job(hdev, job);
5692
5693         rc = gaudi_send_job_on_qman0(hdev, job);
5694         hl_debugfs_remove_job(hdev, job);
5695         kfree(job);
5696         atomic_dec(&cb->cs_cnt);
5697
5698         /* Verify DMA is OK */
5699         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5700         if (err_cause) {
5701                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5702                 rc = -EIO;
5703                 if (!hdev->init_done) {
5704                         dev_dbg(hdev->dev,
5705                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5706                                 err_cause);
5707                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5708                 }
5709         }
5710
5711 release_cb:
5712         hl_cb_put(cb);
5713         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5714
5715         return rc;
5716 }
5717
5718 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5719                                         u32 num_regs, u32 val)
5720 {
5721         struct packet_msg_long *pkt;
5722         struct hl_cs_job *job;
5723         u32 cb_size, ctl;
5724         struct hl_cb *cb;
5725         int i, rc;
5726
5727         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5728
5729         if (cb_size > SZ_2M) {
5730                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5731                 return -ENOMEM;
5732         }
5733
5734         cb = hl_cb_kernel_create(hdev, cb_size, false);
5735         if (!cb)
5736                 return -EFAULT;
5737
5738         pkt = cb->kernel_address;
5739
5740         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5741         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5742         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5743         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5744         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5745
5746         for (i = 0; i < num_regs ; i++, pkt++) {
5747                 pkt->ctl = cpu_to_le32(ctl);
5748                 pkt->value = cpu_to_le32(val);
5749                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5750         }
5751
5752         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5753         if (!job) {
5754                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5755                 rc = -ENOMEM;
5756                 goto release_cb;
5757         }
5758
5759         job->id = 0;
5760         job->user_cb = cb;
5761         atomic_inc(&job->user_cb->cs_cnt);
5762         job->user_cb_size = cb_size;
5763         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5764         job->patched_cb = job->user_cb;
5765         job->job_cb_size = cb_size;
5766
5767         hl_debugfs_add_job(hdev, job);
5768
5769         rc = gaudi_send_job_on_qman0(hdev, job);
5770         hl_debugfs_remove_job(hdev, job);
5771         kfree(job);
5772         atomic_dec(&cb->cs_cnt);
5773
5774 release_cb:
5775         hl_cb_put(cb);
5776         hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5777
5778         return rc;
5779 }
5780
5781 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5782 {
5783         u64 base_addr;
5784         u32 num_regs;
5785         int rc;
5786
5787         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5788         num_regs = NUM_OF_SOB_IN_BLOCK;
5789         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5790         if (rc) {
5791                 dev_err(hdev->dev, "failed resetting SM registers");
5792                 return -ENOMEM;
5793         }
5794
5795         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5796         num_regs = NUM_OF_SOB_IN_BLOCK;
5797         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5798         if (rc) {
5799                 dev_err(hdev->dev, "failed resetting SM registers");
5800                 return -ENOMEM;
5801         }
5802
5803         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5804         num_regs = NUM_OF_SOB_IN_BLOCK;
5805         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5806         if (rc) {
5807                 dev_err(hdev->dev, "failed resetting SM registers");
5808                 return -ENOMEM;
5809         }
5810
5811         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5812         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5813         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5814         if (rc) {
5815                 dev_err(hdev->dev, "failed resetting SM registers");
5816                 return -ENOMEM;
5817         }
5818
5819         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5820         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5821         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5822         if (rc) {
5823                 dev_err(hdev->dev, "failed resetting SM registers");
5824                 return -ENOMEM;
5825         }
5826
5827         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5828         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5829         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5830         if (rc) {
5831                 dev_err(hdev->dev, "failed resetting SM registers");
5832                 return -ENOMEM;
5833         }
5834
5835         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5836                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5837         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5838         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5839         if (rc) {
5840                 dev_err(hdev->dev, "failed resetting SM registers");
5841                 return -ENOMEM;
5842         }
5843
5844         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5845                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5846         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5847         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5848         if (rc) {
5849                 dev_err(hdev->dev, "failed resetting SM registers");
5850                 return -ENOMEM;
5851         }
5852
5853         return 0;
5854 }
5855
5856 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5857 {
5858         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5859                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5860         int i;
5861
5862         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5863                 u64 sob_addr = CFG_BASE +
5864                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5865                                 (i * sob_delta);
5866                 u32 dma_offset = i * DMA_CORE_OFFSET;
5867
5868                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5869                                 lower_32_bits(sob_addr));
5870                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5871                                 upper_32_bits(sob_addr));
5872                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5873
5874                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5875                  * modified by the user for SRAM reduction
5876                  */
5877                 if (i > 1)
5878                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5879                                                                 0x00000001);
5880         }
5881 }
5882
5883 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5884 {
5885         u32 qman_offset;
5886         int i;
5887
5888         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5889                 qman_offset = i * DMA_QMAN_OFFSET;
5890                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5891         }
5892
5893         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5894                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5895                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5896         }
5897
5898         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5899                 qman_offset = i * TPC_QMAN_OFFSET;
5900                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5901         }
5902
5903         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5904                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5905                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5906                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5907         }
5908 }
5909
5910 static int gaudi_restore_user_registers(struct hl_device *hdev)
5911 {
5912         int rc;
5913
5914         rc = gaudi_restore_sm_registers(hdev);
5915         if (rc)
5916                 return rc;
5917
5918         gaudi_restore_dma_registers(hdev);
5919         gaudi_restore_qm_registers(hdev);
5920
5921         return 0;
5922 }
5923
5924 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5925 {
5926         return 0;
5927 }
5928
5929 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5930 {
5931         u32 size = hdev->asic_prop.mmu_pgt_size +
5932                         hdev->asic_prop.mmu_cache_mng_size;
5933         struct gaudi_device *gaudi = hdev->asic_specific;
5934         u64 addr = hdev->asic_prop.mmu_pgt_addr;
5935
5936         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5937                 return 0;
5938
5939         return gaudi_memset_device_memory(hdev, addr, size, 0);
5940 }
5941
5942 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5943 {
5944
5945 }
5946
5947 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5948                                         u32 size_to_dma, dma_addr_t dma_addr)
5949 {
5950         u32 err_cause, val;
5951         u64 dma_offset;
5952         int rc;
5953
5954         dma_offset = dma_id * DMA_CORE_OFFSET;
5955
5956         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5957         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5958         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5959         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5960         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5961         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5962                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5963
5964         rc = hl_poll_timeout(
5965                 hdev,
5966                 mmDMA0_CORE_STS0 + dma_offset,
5967                 val,
5968                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5969                 0,
5970                 1000000);
5971
5972         if (rc) {
5973                 dev_err(hdev->dev,
5974                         "DMA %d timed-out during reading of 0x%llx\n",
5975                         dma_id, addr);
5976                 return -EIO;
5977         }
5978
5979         /* Verify DMA is OK */
5980         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5981         if (err_cause) {
5982                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5983                 dev_dbg(hdev->dev,
5984                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5985                         err_cause);
5986                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5987
5988                 return -EIO;
5989         }
5990
5991         return 0;
5992 }
5993
5994 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5995                                 void *blob_addr)
5996 {
5997         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5998         u32 qm_glbl_sts0, qm_cgm_sts;
5999         u64 dma_offset, qm_offset;
6000         dma_addr_t dma_addr;
6001         void *kernel_addr;
6002         bool is_eng_idle;
6003         int rc = 0, dma_id;
6004
6005         kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
6006
6007         if (!kernel_addr)
6008                 return -ENOMEM;
6009
6010         hdev->asic_funcs->hw_queues_lock(hdev);
6011
6012         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6013         dma_offset = dma_id * DMA_CORE_OFFSET;
6014         qm_offset = dma_id * DMA_QMAN_OFFSET;
6015         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6016         qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6017         qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6018         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6019                       IS_DMA_IDLE(dma_core_sts0);
6020
6021         if (!is_eng_idle) {
6022                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6023                 dma_offset = dma_id * DMA_CORE_OFFSET;
6024                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6025                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6026                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
6027                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
6028                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6029                               IS_DMA_IDLE(dma_core_sts0);
6030
6031                 if (!is_eng_idle) {
6032                         dev_err_ratelimited(hdev->dev,
6033                                 "Can't read via DMA because it is BUSY\n");
6034                         rc = -EAGAIN;
6035                         goto out;
6036                 }
6037         }
6038
6039         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6040         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6041                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6042
6043         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6044          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6045          * ASID
6046          */
6047         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6048
6049         /* Verify DMA is OK */
6050         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6051         if (err_cause) {
6052                 dev_dbg(hdev->dev,
6053                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6054                         err_cause);
6055                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6056         }
6057
6058         pos = 0;
6059         size_left = size;
6060         size_to_dma = SZ_2M;
6061
6062         while (size_left > 0) {
6063
6064                 if (size_left < SZ_2M)
6065                         size_to_dma = size_left;
6066
6067                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6068                                                 dma_addr);
6069                 if (rc)
6070                         break;
6071
6072                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6073
6074                 if (size_left <= SZ_2M)
6075                         break;
6076
6077                 pos += SZ_2M;
6078                 addr += SZ_2M;
6079                 size_left -= SZ_2M;
6080         }
6081
6082         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6083          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6084          * ASID
6085          */
6086         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6087                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6088
6089         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6090
6091 out:
6092         hdev->asic_funcs->hw_queues_unlock(hdev);
6093
6094         hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6095
6096         return rc;
6097 }
6098
6099 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6100 {
6101         struct gaudi_device *gaudi = hdev->asic_specific;
6102
6103         if (hdev->reset_info.hard_reset_pending)
6104                 return U64_MAX;
6105
6106         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6107                         (addr - gaudi->hbm_bar_cur_addr));
6108 }
6109
6110 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6111 {
6112         struct gaudi_device *gaudi = hdev->asic_specific;
6113
6114         if (hdev->reset_info.hard_reset_pending)
6115                 return;
6116
6117         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6118                         (addr - gaudi->hbm_bar_cur_addr));
6119 }
6120
6121 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6122 {
6123         /* mask to zero the MMBP and ASID bits */
6124         WREG32_AND(reg, ~0x7FF);
6125         WREG32_OR(reg, asid);
6126 }
6127
6128 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6129 {
6130         struct gaudi_device *gaudi = hdev->asic_specific;
6131
6132         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6133                 return;
6134
6135         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6136                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6137                 return;
6138         }
6139
6140         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6141         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6142         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6143         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6144         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6145
6146         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6147         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6148         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6149         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6150         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6151
6152         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6157
6158         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6159         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6160         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6161         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6162         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6163
6164         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6165         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6166         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6167         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6168         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6169
6170         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6171         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6172         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6173         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6174         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6175
6176         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6181
6182         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6183         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6184         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6185         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6186         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6187
6188         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6189         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6190         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6191         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6192         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6193         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6194         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6195         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6196
6197         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6198         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6199         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6200         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6201         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6202         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6203         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6204
6205         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6206         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6207         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6208         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6209         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6210         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6211         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6212
6213         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6214         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6215         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6216         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6217         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6218         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6219         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6220
6221         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6222         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6223         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6224         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6225         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6226         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6227         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6228
6229         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6230         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6231         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6232         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6233         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6234         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6235         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6236
6237         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6238         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6239         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6240         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6241         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6242         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6243         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6244
6245         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6246         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6247         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6248         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6249         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6250         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6251         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6252
6253         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6254         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6255         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6256         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6257         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6258         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6259         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6260
6261         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6262         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6263         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6264         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6265         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6266         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6267         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6268         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6269         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6270         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6271
6272         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6273         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6274         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6275         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6276         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6277         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6278         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6279         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6280         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6281         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6282         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6283         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6284
6285         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6286                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6287                                 asid);
6288                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6289                                 asid);
6290                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6291                                 asid);
6292                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6293                                 asid);
6294                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6295                                 asid);
6296         }
6297
6298         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6299                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6300                                 asid);
6301                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6302                                 asid);
6303                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6304                                 asid);
6305                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6306                                 asid);
6307                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6308                                 asid);
6309         }
6310
6311         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6312                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6313                                 asid);
6314                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6315                                 asid);
6316                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6317                                 asid);
6318                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6319                                 asid);
6320                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6321                                 asid);
6322         }
6323
6324         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6325                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6326                                 asid);
6327                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6328                                 asid);
6329                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6330                                 asid);
6331                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6332                                 asid);
6333                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6334                                 asid);
6335         }
6336
6337         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6338                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6339                                 asid);
6340                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6341                                 asid);
6342                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6343                                 asid);
6344                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6345                                 asid);
6346                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6347                                 asid);
6348         }
6349
6350         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6351                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6352                                 asid);
6353                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6354                                 asid);
6355                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6356                                 asid);
6357                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6358                                 asid);
6359                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6360                                 asid);
6361         }
6362
6363         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6364                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6365                                 asid);
6366                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6367                                 asid);
6368                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6369                                 asid);
6370                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6371                                 asid);
6372                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6373                                 asid);
6374         }
6375
6376         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6377                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6378                                 asid);
6379                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6380                                 asid);
6381                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6382                                 asid);
6383                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6384                                 asid);
6385                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6386                                 asid);
6387         }
6388
6389         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6390                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6391                                 asid);
6392                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6393                                 asid);
6394                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6395                                 asid);
6396                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6397                                 asid);
6398                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6399                                 asid);
6400         }
6401
6402         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6403                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6404                                 asid);
6405                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6406                                 asid);
6407                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6408                                 asid);
6409                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6410                                 asid);
6411                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6412                                 asid);
6413         }
6414
6415         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6416         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6417 }
6418
6419 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6420                 struct hl_cs_job *job)
6421 {
6422         struct packet_msg_prot *fence_pkt;
6423         u32 *fence_ptr;
6424         dma_addr_t fence_dma_addr;
6425         struct hl_cb *cb;
6426         u32 tmp, timeout, dma_offset;
6427         int rc;
6428
6429         if (hdev->pldm)
6430                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6431         else
6432                 timeout = HL_DEVICE_TIMEOUT_USEC;
6433
6434         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6435                 dev_err_ratelimited(hdev->dev,
6436                         "Can't send driver job on QMAN0 because the device is not idle\n");
6437                 return -EBUSY;
6438         }
6439
6440         fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6441         if (!fence_ptr) {
6442                 dev_err(hdev->dev,
6443                         "Failed to allocate fence memory for QMAN0\n");
6444                 return -ENOMEM;
6445         }
6446
6447         cb = job->patched_cb;
6448
6449         fence_pkt = cb->kernel_address +
6450                         job->job_cb_size - sizeof(struct packet_msg_prot);
6451
6452         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6453         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6454         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6455
6456         fence_pkt->ctl = cpu_to_le32(tmp);
6457         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6458         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6459
6460         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6461
6462         WREG32(mmDMA0_CORE_PROT + dma_offset,
6463                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6464
6465         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6466                                         job->job_cb_size, cb->bus_address);
6467         if (rc) {
6468                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6469                 goto free_fence_ptr;
6470         }
6471
6472         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6473                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6474                                 timeout, true);
6475
6476         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6477
6478         if (rc == -ETIMEDOUT) {
6479                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6480                 goto free_fence_ptr;
6481         }
6482
6483 free_fence_ptr:
6484         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6485
6486         hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6487         return rc;
6488 }
6489
6490 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6491 {
6492         if (event_type >= GAUDI_EVENT_SIZE)
6493                 goto event_not_supported;
6494
6495         if (!gaudi_irq_map_table[event_type].valid)
6496                 goto event_not_supported;
6497
6498         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6499
6500         return;
6501
6502 event_not_supported:
6503         snprintf(desc, size, "N/A");
6504 }
6505
6506 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6507                                                         bool is_write, s32 *engine_id_1,
6508                                                         s32 *engine_id_2)
6509 {
6510         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6511
6512         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6513                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6514
6515         switch (x_y) {
6516         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6517         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6518                 dma_id[0] = 0;
6519                 dma_id[1] = 2;
6520                 break;
6521         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6522         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6523                 dma_id[0] = 1;
6524                 dma_id[1] = 3;
6525                 break;
6526         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6527         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6528                 dma_id[0] = 4;
6529                 dma_id[1] = 6;
6530                 break;
6531         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6532         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6533                 dma_id[0] = 5;
6534                 dma_id[1] = 7;
6535                 break;
6536         default:
6537                 goto unknown_initiator;
6538         }
6539
6540         for (i = 0 ; i < 2 ; i++) {
6541                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6542                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6543         }
6544
6545         switch (x_y) {
6546         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6547         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6548                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6549                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6550                         return "DMA0";
6551                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6552                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6553                         return "DMA2";
6554                 } else {
6555                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6556                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6557                         return "DMA0 or DMA2";
6558                 }
6559         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6560         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6561                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6562                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6563                         return "DMA1";
6564                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6565                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6566                         return "DMA3";
6567                 } else {
6568                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6569                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6570                         return "DMA1 or DMA3";
6571                 }
6572         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6573         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6574                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6575                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6576                         return "DMA4";
6577                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6578                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6579                         return "DMA6";
6580                 } else {
6581                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6582                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6583                         return "DMA4 or DMA6";
6584                 }
6585         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6586         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6587                 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6588                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6589                         return "DMA5";
6590                 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6591                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6592                         return "DMA7";
6593                 } else {
6594                         *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6595                         *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6596                         return "DMA5 or DMA7";
6597                 }
6598         }
6599
6600 unknown_initiator:
6601         return "unknown initiator";
6602 }
6603
6604 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6605                                                         u32 *engine_id_1, u32 *engine_id_2)
6606 {
6607         u32 val, x_y, axi_id;
6608
6609         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6610                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6611         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6612                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6613         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6614                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6615
6616         switch (x_y) {
6617         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6618                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6619                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6620                         return "TPC0";
6621                 }
6622                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6623                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6624                         return "NIC0";
6625                 }
6626                 break;
6627         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6628                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6629                 return "TPC1";
6630         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6631         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6632                 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6633                 return "MME0";
6634         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6635         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6636                 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6637                 return "MME1";
6638         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6639                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6640                 return "TPC2";
6641         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6642                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6643                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6644                         return "TPC3";
6645                 }
6646                 /* PCI, CPU or PSOC does not have engine id*/
6647                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6648                         return "PCI";
6649                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6650                         return "CPU";
6651                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6652                         return "PSOC";
6653                 break;
6654         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6655         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6656         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6657         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6658         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6659         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6660         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6661         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6662                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6663                                 engine_id_1, engine_id_2);
6664         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6665                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6666                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6667                         return "TPC4";
6668                 }
6669                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6670                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6671                         return "NIC1";
6672                 }
6673                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6674                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6675                         return "NIC2";
6676                 }
6677                 break;
6678         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6679                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6680                 return "TPC5";
6681         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6682         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6683                 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6684                 return "MME2";
6685         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6686         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6687                 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6688                 return "MME3";
6689         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6690                 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6691                 return "TPC6";
6692         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6693                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6694                         *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6695                         return "TPC7";
6696                 }
6697                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6698                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6699                         return "NIC4";
6700                 }
6701                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6702                         *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6703                         return "NIC5";
6704                 }
6705                 break;
6706         default:
6707                 break;
6708         }
6709
6710         dev_err(hdev->dev,
6711                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6712                 val,
6713                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6714                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6715                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6716                         RAZWI_INITIATOR_AXI_ID_MASK);
6717
6718         return "unknown initiator";
6719 }
6720
6721 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
6722                                                 u32 *engine_id_2)
6723 {
6724
6725         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6726                 dev_err_ratelimited(hdev->dev,
6727                         "RAZWI event caused by illegal write of %s\n",
6728                         gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6729                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6730         }
6731
6732         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6733                 dev_err_ratelimited(hdev->dev,
6734                         "RAZWI event caused by illegal read of %s\n",
6735                         gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6736                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6737         }
6738 }
6739
6740 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
6741 {
6742         struct gaudi_device *gaudi = hdev->asic_specific;
6743         u32 val;
6744
6745         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6746                 return;
6747
6748         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6749         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6750                 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6751                 *addr <<= 32;
6752                 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6753
6754                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6755                 *type = HL_RAZWI_PAGE_FAULT;
6756
6757                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6758         }
6759
6760         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6761         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6762                 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6763                 *addr <<= 32;
6764                 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6765
6766                 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6767                 *type = HL_RAZWI_MMU_ACCESS_ERROR;
6768
6769                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6770         }
6771 }
6772
6773 /*
6774  *  +-------------------+------------------------------------------------------+
6775  *  | Configuration Reg |                     Description                      |
6776  *  |      Address      |                                                      |
6777  *  +-------------------+------------------------------------------------------+
6778  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6779  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6780  *  |                   |0xF34 memory wrappers 63:32                           |
6781  *  |                   |0xF38 memory wrappers 95:64                           |
6782  *  |                   |0xF3C memory wrappers 127:96                          |
6783  *  +-------------------+------------------------------------------------------+
6784  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6785  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6786  *  |                   |0xF44 memory wrappers 63:32                           |
6787  *  |                   |0xF48 memory wrappers 95:64                           |
6788  *  |                   |0xF4C memory wrappers 127:96                          |
6789  *  +-------------------+------------------------------------------------------+
6790  */
6791 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6792                 struct ecc_info_extract_params *params, u64 *ecc_address,
6793                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6794 {
6795         u32 i, num_mem_regs, reg, err_bit;
6796         u64 err_addr, err_word = 0;
6797
6798         num_mem_regs = params->num_memories / 32 +
6799                         ((params->num_memories % 32) ? 1 : 0);
6800
6801         if (params->block_address >= CFG_BASE)
6802                 params->block_address -= CFG_BASE;
6803
6804         if (params->derr)
6805                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6806         else
6807                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6808
6809         /* Set invalid wrapper index */
6810         *memory_wrapper_idx = 0xFF;
6811
6812         /* Iterate through memory wrappers, a single bit must be set */
6813         for (i = 0 ; i < num_mem_regs ; i++) {
6814                 err_addr += i * 4;
6815                 err_word = RREG32(err_addr);
6816                 if (err_word) {
6817                         err_bit = __ffs(err_word);
6818                         *memory_wrapper_idx = err_bit + (32 * i);
6819                         break;
6820                 }
6821         }
6822
6823         if (*memory_wrapper_idx == 0xFF) {
6824                 dev_err(hdev->dev, "ECC error information cannot be found\n");
6825                 return -EINVAL;
6826         }
6827
6828         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6829                         *memory_wrapper_idx);
6830
6831         *ecc_address =
6832                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6833         *ecc_syndrom =
6834                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6835
6836         /* Clear error indication */
6837         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6838         if (params->derr)
6839                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6840         else
6841                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6842
6843         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6844
6845         return 0;
6846 }
6847
6848 /*
6849  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6850  *
6851  * @idx: the current pi/ci value
6852  * @q_len: the queue length (power of 2)
6853  *
6854  * @return the cyclically decremented index
6855  */
6856 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6857 {
6858         u32 mask = q_len - 1;
6859
6860         /*
6861          * modular decrement is equivalent to adding (queue_size -1)
6862          * later we take LSBs to make sure the value is in the
6863          * range [0, queue_len - 1]
6864          */
6865         return (idx + q_len - 1) & mask;
6866 }
6867
6868 /**
6869  * gaudi_handle_sw_config_stream_data - print SW config stream data
6870  *
6871  * @hdev: pointer to the habanalabs device structure
6872  * @stream: the QMAN's stream
6873  * @qman_base: base address of QMAN registers block
6874  * @event_mask: mask of the last events occurred
6875  */
6876 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6877                                                 u64 qman_base, u64 event_mask)
6878 {
6879         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6880         u32 cq_ptr_lo_off, size;
6881
6882         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6883
6884         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6885                                                 stream * cq_ptr_lo_off;
6886         cq_ptr_hi = cq_ptr_lo +
6887                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6888         cq_tsize = cq_ptr_lo +
6889                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6890
6891         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6892         size = RREG32(cq_tsize);
6893         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6894                                                         stream, cq_ptr, size);
6895
6896         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6897                 hdev->last_error.undef_opcode.cq_addr = cq_ptr;
6898                 hdev->last_error.undef_opcode.cq_size = size;
6899                 hdev->last_error.undef_opcode.stream_id = stream;
6900         }
6901 }
6902
6903 /**
6904  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6905  *
6906  * @hdev: pointer to the habanalabs device structure
6907  * @qid_base: first QID of the QMAN (out of 4 streams)
6908  * @stream: the QMAN's stream
6909  * @qman_base: base address of QMAN registers block
6910  * @event_mask: mask of the last events occurred
6911  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6912  */
6913 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6914                                                 u32 stream, u64 qman_base,
6915                                                 u64 event_mask,
6916                                                 bool pr_sw_conf)
6917 {
6918         u32 ci, qm_ci_stream_off, queue_len;
6919         struct hl_hw_queue *q;
6920         u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6921         int i;
6922
6923         q = &hdev->kernel_queues[qid_base + stream];
6924
6925         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6926         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6927                                                 stream * qm_ci_stream_off;
6928
6929         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6930                                         q->int_queue_len : HL_QUEUE_LENGTH;
6931
6932         hdev->asic_funcs->hw_queues_lock(hdev);
6933
6934         if (pr_sw_conf)
6935                 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6936
6937         ci = RREG32(pq_ci);
6938
6939         /* we should start printing form ci -1 */
6940         ci = gaudi_queue_idx_dec(ci, queue_len);
6941         memset(addr, 0, sizeof(addr));
6942
6943         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6944                 struct hl_bd *bd;
6945                 u32 len;
6946
6947                 bd = q->kernel_address;
6948                 bd += ci;
6949
6950                 len = le32_to_cpu(bd->len);
6951                 /* len 0 means uninitialized entry- break */
6952                 if (!len)
6953                         break;
6954
6955                 addr[i] = le64_to_cpu(bd->ptr);
6956
6957                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6958                                                         stream, ci, addr[i], len);
6959
6960                 /* get previous ci, wrap if needed */
6961                 ci = gaudi_queue_idx_dec(ci, queue_len);
6962         }
6963
6964         if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6965                 struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode;
6966                 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6967
6968                 if (arr_idx == 0) {
6969                         undef_opcode->timestamp = ktime_get();
6970                         undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6971                 }
6972
6973                 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6974                 undef_opcode->cb_addr_streams_len++;
6975         }
6976
6977         hdev->asic_funcs->hw_queues_unlock(hdev);
6978 }
6979
6980 /**
6981  * handle_qman_data_on_err - extract QMAN data on error
6982  *
6983  * @hdev: pointer to the habanalabs device structure
6984  * @qid_base: first QID of the QMAN (out of 4 streams)
6985  * @stream: the QMAN's stream
6986  * @qman_base: base address of QMAN registers block
6987  * @event_mask: mask of the last events occurred
6988  *
6989  * This function attempt to exatract as much data as possible on QMAN error.
6990  * On upper CP print the SW config stream data and last 8 PQEs.
6991  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6992  */
6993 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6994                                    u32 stream, u64 qman_base, u64 event_mask)
6995 {
6996         u32 i;
6997
6998         if (stream != QMAN_STREAMS) {
6999                 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
7000                         qman_base, event_mask, true);
7001                 return;
7002         }
7003
7004         /* handle Lower-CP */
7005         gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
7006
7007         for (i = 0; i < QMAN_STREAMS; i++)
7008                 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
7009                         qman_base, event_mask, false);
7010 }
7011
7012 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7013                                           const char *qm_name,
7014                                           u64 qman_base,
7015                                           u32 qid_base,
7016                                           u64 *event_mask)
7017 {
7018         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7019         u64 glbl_sts_addr, arb_err_addr;
7020         char reg_desc[32];
7021
7022         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7023         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7024
7025         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7026         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7027                 glbl_sts_clr_val = 0;
7028                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7029
7030                 if (!glbl_sts_val)
7031                         continue;
7032
7033                 if (i == QMAN_STREAMS)
7034                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7035                 else
7036                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7037
7038                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7039                         if (glbl_sts_val & BIT(j)) {
7040                                 dev_err_ratelimited(hdev->dev,
7041                                                 "%s %s. err cause: %s\n",
7042                                                 qm_name, reg_desc,
7043                                                 gaudi_qman_error_cause[j]);
7044                                 glbl_sts_clr_val |= BIT(j);
7045                         }
7046                 }
7047                 /* check for undefined opcode */
7048                 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
7049                                 hdev->last_error.undef_opcode.write_enable) {
7050                         memset(&hdev->last_error.undef_opcode, 0,
7051                                                 sizeof(hdev->last_error.undef_opcode));
7052
7053                         hdev->last_error.undef_opcode.write_enable = false;
7054                         *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7055                 }
7056
7057                 /* Write 1 clear errors */
7058                 if (!hdev->stop_on_err)
7059                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7060                 else
7061                         handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
7062         }
7063
7064         arb_err_val = RREG32(arb_err_addr);
7065
7066         if (!arb_err_val)
7067                 return;
7068
7069         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7070                 if (arb_err_val & BIT(j)) {
7071                         dev_err_ratelimited(hdev->dev,
7072                                         "%s ARB_ERR. err cause: %s\n",
7073                                         qm_name,
7074                                         gaudi_qman_arb_error_cause[j]);
7075                 }
7076         }
7077 }
7078
7079 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7080                 struct hl_eq_sm_sei_data *sei_data)
7081 {
7082         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7083
7084         /* Flip the bits as the enum is ordered in the opposite way */
7085         index = (index ^ 0x3) & 0x3;
7086
7087         switch (sei_data->sei_cause) {
7088         case SM_SEI_SO_OVERFLOW:
7089                 dev_err_ratelimited(hdev->dev,
7090                         "%s SEI Error: SOB Group %u overflow/underflow",
7091                         gaudi_sync_manager_names[index],
7092                         le32_to_cpu(sei_data->sei_log));
7093                 break;
7094         case SM_SEI_LBW_4B_UNALIGNED:
7095                 dev_err_ratelimited(hdev->dev,
7096                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7097                         gaudi_sync_manager_names[index],
7098                         le32_to_cpu(sei_data->sei_log));
7099                 break;
7100         case SM_SEI_AXI_RESPONSE_ERR:
7101                 dev_err_ratelimited(hdev->dev,
7102                         "%s SEI Error: AXI ID %u response error",
7103                         gaudi_sync_manager_names[index],
7104                         le32_to_cpu(sei_data->sei_log));
7105                 break;
7106         default:
7107                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7108                                 le32_to_cpu(sei_data->sei_log));
7109                 break;
7110         }
7111 }
7112
7113 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7114                 struct hl_eq_ecc_data *ecc_data)
7115 {
7116         struct ecc_info_extract_params params;
7117         u64 ecc_address = 0, ecc_syndrom = 0;
7118         u8 index, memory_wrapper_idx = 0;
7119         bool extract_info_from_fw;
7120         int rc;
7121
7122         if (hdev->asic_prop.fw_security_enabled) {
7123                 extract_info_from_fw = true;
7124                 goto extract_ecc_info;
7125         }
7126
7127         switch (event_type) {
7128         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7129         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7130                 extract_info_from_fw = true;
7131                 break;
7132         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7133                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7134                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7135                 params.num_memories = 90;
7136                 params.derr = false;
7137                 extract_info_from_fw = false;
7138                 break;
7139         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7140                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7141                 params.block_address =
7142                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7143                 params.num_memories = 90;
7144                 params.derr = true;
7145                 extract_info_from_fw = false;
7146                 break;
7147         case GAUDI_EVENT_MME0_ACC_SERR:
7148         case GAUDI_EVENT_MME1_ACC_SERR:
7149         case GAUDI_EVENT_MME2_ACC_SERR:
7150         case GAUDI_EVENT_MME3_ACC_SERR:
7151                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7152                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7153                 params.num_memories = 128;
7154                 params.derr = false;
7155                 extract_info_from_fw = false;
7156                 break;
7157         case GAUDI_EVENT_MME0_ACC_DERR:
7158         case GAUDI_EVENT_MME1_ACC_DERR:
7159         case GAUDI_EVENT_MME2_ACC_DERR:
7160         case GAUDI_EVENT_MME3_ACC_DERR:
7161                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7162                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7163                 params.num_memories = 128;
7164                 params.derr = true;
7165                 extract_info_from_fw = false;
7166                 break;
7167         case GAUDI_EVENT_MME0_SBAB_SERR:
7168         case GAUDI_EVENT_MME1_SBAB_SERR:
7169         case GAUDI_EVENT_MME2_SBAB_SERR:
7170         case GAUDI_EVENT_MME3_SBAB_SERR:
7171                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7172                 params.block_address =
7173                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7174                 params.num_memories = 33;
7175                 params.derr = false;
7176                 extract_info_from_fw = false;
7177                 break;
7178         case GAUDI_EVENT_MME0_SBAB_DERR:
7179         case GAUDI_EVENT_MME1_SBAB_DERR:
7180         case GAUDI_EVENT_MME2_SBAB_DERR:
7181         case GAUDI_EVENT_MME3_SBAB_DERR:
7182                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7183                 params.block_address =
7184                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7185                 params.num_memories = 33;
7186                 params.derr = true;
7187                 extract_info_from_fw = false;
7188                 break;
7189         default:
7190                 return;
7191         }
7192
7193 extract_ecc_info:
7194         if (extract_info_from_fw) {
7195                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7196                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7197                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7198         } else {
7199                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7200                                 &ecc_syndrom, &memory_wrapper_idx);
7201                 if (rc)
7202                         return;
7203         }
7204
7205         dev_err(hdev->dev,
7206                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7207                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7208 }
7209
7210 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7211 {
7212         u64 qman_base;
7213         char desc[32];
7214         u32 qid_base;
7215         u8 index;
7216
7217         switch (event_type) {
7218         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7219                 /* In TPC QM event, notify on TPC assertion. While there isn't
7220                  * a specific event for assertion yet, the FW generates QM event.
7221                  * The SW upper layer will inspect an internal mapped area to indicate
7222                  * if the event is a tpc assertion or tpc QM.
7223                  */
7224                 *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7225                 index = event_type - GAUDI_EVENT_TPC0_QM;
7226                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7227                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7228                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7229                 break;
7230         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7231                 if (event_type == GAUDI_EVENT_MME0_QM) {
7232                         index = 0;
7233                         qid_base = GAUDI_QUEUE_ID_MME_0_0;
7234                 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7235                         index = 2;
7236                         qid_base = GAUDI_QUEUE_ID_MME_1_0;
7237                 }
7238                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7239                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7240                 break;
7241         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7242                 index = event_type - GAUDI_EVENT_DMA0_QM;
7243                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7244                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7245                 if (index > 1)
7246                         qid_base++;
7247                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7248                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7249                 break;
7250         case GAUDI_EVENT_NIC0_QM0:
7251                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7252                 qman_base = mmNIC0_QM0_BASE;
7253                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7254                 break;
7255         case GAUDI_EVENT_NIC0_QM1:
7256                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7257                 qman_base = mmNIC0_QM1_BASE;
7258                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7259                 break;
7260         case GAUDI_EVENT_NIC1_QM0:
7261                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7262                 qman_base = mmNIC1_QM0_BASE;
7263                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7264                 break;
7265         case GAUDI_EVENT_NIC1_QM1:
7266                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7267                 qman_base = mmNIC1_QM1_BASE;
7268                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7269                 break;
7270         case GAUDI_EVENT_NIC2_QM0:
7271                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7272                 qman_base = mmNIC2_QM0_BASE;
7273                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7274                 break;
7275         case GAUDI_EVENT_NIC2_QM1:
7276                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7277                 qman_base = mmNIC2_QM1_BASE;
7278                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7279                 break;
7280         case GAUDI_EVENT_NIC3_QM0:
7281                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7282                 qman_base = mmNIC3_QM0_BASE;
7283                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7284                 break;
7285         case GAUDI_EVENT_NIC3_QM1:
7286                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7287                 qman_base = mmNIC3_QM1_BASE;
7288                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7289                 break;
7290         case GAUDI_EVENT_NIC4_QM0:
7291                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7292                 qman_base = mmNIC4_QM0_BASE;
7293                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7294                 break;
7295         case GAUDI_EVENT_NIC4_QM1:
7296                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7297                 qman_base = mmNIC4_QM1_BASE;
7298                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7299                 break;
7300         default:
7301                 return;
7302         }
7303
7304         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7305 }
7306
7307 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7308                                         bool razwi)
7309 {
7310         u32 engine_id_1, engine_id_2;
7311         char desc[64] = "";
7312         u64 razwi_addr = 0;
7313         u8 razwi_type;
7314         int rc;
7315
7316         /*
7317          * Init engine id by default as not valid and only if razwi initiated from engine with
7318          * engine id it will get valid value.
7319          * Init razwi type to default, will be changed only if razwi caused by page fault of
7320          * MMU access error
7321          */
7322         engine_id_1 = U16_MAX;
7323         engine_id_2 = U16_MAX;
7324         razwi_type = U8_MAX;
7325
7326         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7327         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7328                 event_type, desc);
7329
7330         if (razwi) {
7331                 gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
7332                 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
7333
7334                 /* In case it's the first razwi, save its parameters*/
7335                 rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0);
7336                 if (rc) {
7337                         hdev->last_error.razwi.timestamp = ktime_get();
7338                         hdev->last_error.razwi.addr = razwi_addr;
7339                         hdev->last_error.razwi.engine_id_1 = engine_id_1;
7340                         hdev->last_error.razwi.engine_id_2 = engine_id_2;
7341                         /*
7342                          * If first engine id holds non valid value the razwi initiator
7343                          * does not have engine id
7344                          */
7345                         hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX);
7346                         hdev->last_error.razwi.type = razwi_type;
7347
7348                 }
7349         }
7350 }
7351
7352 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7353                                         struct cpucp_pkt_sync_err *sync_err)
7354 {
7355         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7356
7357         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7358                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7359 }
7360
7361 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7362                                         struct hl_eq_fw_alive *fw_alive)
7363 {
7364         dev_err(hdev->dev,
7365                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7366                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7367                 "Minor" : "Critical", fw_alive->process_id,
7368                 fw_alive->thread_id, fw_alive->uptime_seconds);
7369 }
7370
7371 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7372                                                 void *data)
7373 {
7374         char desc[64] = "", *type;
7375         struct eq_nic_sei_event *eq_nic_sei = data;
7376         u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7377
7378         switch (eq_nic_sei->axi_error_cause) {
7379         case RXB:
7380                 type = "RXB";
7381                 break;
7382         case RXE:
7383                 type = "RXE";
7384                 break;
7385         case TXS:
7386                 type = "TXS";
7387                 break;
7388         case TXE:
7389                 type = "TXE";
7390                 break;
7391         case QPC_RESP:
7392                 type = "QPC_RESP";
7393                 break;
7394         case NON_AXI_ERR:
7395                 type = "NON_AXI_ERR";
7396                 break;
7397         case TMR:
7398                 type = "TMR";
7399                 break;
7400         default:
7401                 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7402                         eq_nic_sei->axi_error_cause);
7403                 type = "N/A";
7404                 break;
7405         }
7406
7407         snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7408                         eq_nic_sei->id);
7409         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7410                 event_type, desc);
7411 }
7412
7413 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7414 {
7415         /* GAUDI doesn't support any reset except hard-reset */
7416         return -EPERM;
7417 }
7418
7419 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7420                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7421 {
7422         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7423         int rc = 0;
7424
7425         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7426                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7427                 if (!hbm_ecc_data) {
7428                         dev_err(hdev->dev, "No FW ECC data");
7429                         return 0;
7430                 }
7431
7432                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7433                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7434                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7435                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7436                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7437                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7438                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7439                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7440                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7441                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7442                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7443                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7444                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7445                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7446
7447                 dev_err(hdev->dev,
7448                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7449                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7450                 dev_err(hdev->dev,
7451                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7452                         device, ch, hbm_ecc_data->first_addr, type,
7453                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7454                         hbm_ecc_data->dec_cnt);
7455                 return 0;
7456         }
7457
7458         if (hdev->asic_prop.fw_security_enabled) {
7459                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7460                 return 0;
7461         }
7462
7463         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7464         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7465                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7466                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7467                 if (val) {
7468                         rc = -EIO;
7469                         dev_err(hdev->dev,
7470                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7471                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7472                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7473                                 (val >> 4) & 0x1);
7474
7475                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7476                         dev_err(hdev->dev,
7477                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7478                                 device, ch * 2,
7479                                 RREG32(base + ch * 0x1000 + 0x064),
7480                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7481                                 (val2 & 0xFF0000) >> 16,
7482                                 (val2 & 0xFF000000) >> 24);
7483                 }
7484
7485                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7486                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7487                 if (val) {
7488                         rc = -EIO;
7489                         dev_err(hdev->dev,
7490                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7491                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7492                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7493                                 (val >> 4) & 0x1);
7494
7495                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7496                         dev_err(hdev->dev,
7497                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7498                                 device, ch * 2 + 1,
7499                                 RREG32(base + ch * 0x1000 + 0x074),
7500                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7501                                 (val2 & 0xFF0000) >> 16,
7502                                 (val2 & 0xFF000000) >> 24);
7503                 }
7504
7505                 /* Clear interrupts */
7506                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7507                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7508                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7509                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7510                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7511                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7512         }
7513
7514         val  = RREG32(base + 0x8F30);
7515         val2 = RREG32(base + 0x8F34);
7516         if (val | val2) {
7517                 rc = -EIO;
7518                 dev_err(hdev->dev,
7519                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7520                         device, val, val2);
7521         }
7522         val  = RREG32(base + 0x8F40);
7523         val2 = RREG32(base + 0x8F44);
7524         if (val | val2) {
7525                 rc = -EIO;
7526                 dev_err(hdev->dev,
7527                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7528                         device, val, val2);
7529         }
7530
7531         return rc;
7532 }
7533
7534 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7535 {
7536         switch (hbm_event_type) {
7537         case GAUDI_EVENT_HBM0_SPI_0:
7538         case GAUDI_EVENT_HBM0_SPI_1:
7539                 return 0;
7540         case GAUDI_EVENT_HBM1_SPI_0:
7541         case GAUDI_EVENT_HBM1_SPI_1:
7542                 return 1;
7543         case GAUDI_EVENT_HBM2_SPI_0:
7544         case GAUDI_EVENT_HBM2_SPI_1:
7545                 return 2;
7546         case GAUDI_EVENT_HBM3_SPI_0:
7547         case GAUDI_EVENT_HBM3_SPI_1:
7548                 return 3;
7549         default:
7550                 break;
7551         }
7552
7553         /* Should never happen */
7554         return 0;
7555 }
7556
7557 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7558                                         char *interrupt_name)
7559 {
7560         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7561         bool soft_reset_required = false;
7562
7563         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7564                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7565
7566         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7567                 if (tpc_interrupts_cause & BIT(i)) {
7568                         dev_err_ratelimited(hdev->dev,
7569                                         "TPC%d_%s interrupt cause: %s\n",
7570                                         tpc_id, interrupt_name,
7571                                         gaudi_tpc_interrupts_cause[i]);
7572                         /* If this is QM error, we need to soft-reset */
7573                         if (i == 15)
7574                                 soft_reset_required = true;
7575                 }
7576
7577         /* Clear interrupts */
7578         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7579
7580         return soft_reset_required;
7581 }
7582
7583 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7584 {
7585         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7586 }
7587
7588 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7589 {
7590         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7591 }
7592
7593 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type)
7594 {
7595         ktime_t zero_time = ktime_set(0, 0);
7596
7597         mutex_lock(&hdev->clk_throttling.lock);
7598
7599         switch (event_type) {
7600         case GAUDI_EVENT_FIX_POWER_ENV_S:
7601                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7602                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7603                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7604                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7605                 dev_info_ratelimited(hdev->dev,
7606                         "Clock throttling due to power consumption\n");
7607                 break;
7608
7609         case GAUDI_EVENT_FIX_POWER_ENV_E:
7610                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7611                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7612                 dev_info_ratelimited(hdev->dev,
7613                         "Power envelop is safe, back to optimal clock\n");
7614                 break;
7615
7616         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7617                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7618                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7619                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7620                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7621                 dev_info_ratelimited(hdev->dev,
7622                         "Clock throttling due to overheating\n");
7623                 break;
7624
7625         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7626                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7627                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7628                 dev_info_ratelimited(hdev->dev,
7629                         "Thermal envelop is safe, back to optimal clock\n");
7630                 break;
7631
7632         default:
7633                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7634                         event_type);
7635                 break;
7636         }
7637
7638         mutex_unlock(&hdev->clk_throttling.lock);
7639 }
7640
7641 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7642 {
7643         struct gaudi_device *gaudi = hdev->asic_specific;
7644         u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7645         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7646         u32 fw_fatal_err_flag = 0, flags = 0;
7647         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7648                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7649         bool reset_required, reset_direct = false;
7650         u8 cause;
7651         int rc;
7652
7653         if (event_type >= GAUDI_EVENT_SIZE) {
7654                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7655                                 event_type, GAUDI_EVENT_SIZE - 1);
7656                 return;
7657         }
7658
7659         gaudi->events_stat[event_type]++;
7660         gaudi->events_stat_aggregate[event_type]++;
7661
7662         switch (event_type) {
7663         case GAUDI_EVENT_PCIE_CORE_DERR:
7664         case GAUDI_EVENT_PCIE_IF_DERR:
7665         case GAUDI_EVENT_PCIE_PHY_DERR:
7666         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7667         case GAUDI_EVENT_MME0_ACC_DERR:
7668         case GAUDI_EVENT_MME0_SBAB_DERR:
7669         case GAUDI_EVENT_MME1_ACC_DERR:
7670         case GAUDI_EVENT_MME1_SBAB_DERR:
7671         case GAUDI_EVENT_MME2_ACC_DERR:
7672         case GAUDI_EVENT_MME2_SBAB_DERR:
7673         case GAUDI_EVENT_MME3_ACC_DERR:
7674         case GAUDI_EVENT_MME3_SBAB_DERR:
7675         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7676                 fallthrough;
7677         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7678         case GAUDI_EVENT_PSOC_MEM_DERR:
7679         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7680         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7681         case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7682         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7683         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7684         case GAUDI_EVENT_MMU_DERR:
7685         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7686                 gaudi_print_irq_info(hdev, event_type, true);
7687                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7688                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7689                 goto reset_device;
7690
7691         case GAUDI_EVENT_GIC500:
7692         case GAUDI_EVENT_AXI_ECC:
7693         case GAUDI_EVENT_L2_RAM_ECC:
7694         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7695                 gaudi_print_irq_info(hdev, event_type, false);
7696                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7697                 goto reset_device;
7698
7699         case GAUDI_EVENT_HBM0_SPI_0:
7700         case GAUDI_EVENT_HBM1_SPI_0:
7701         case GAUDI_EVENT_HBM2_SPI_0:
7702         case GAUDI_EVENT_HBM3_SPI_0:
7703                 gaudi_print_irq_info(hdev, event_type, false);
7704                 gaudi_hbm_read_interrupts(hdev,
7705                                 gaudi_hbm_event_to_dev(event_type),
7706                                 &eq_entry->hbm_ecc_data);
7707                 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7708                 goto reset_device;
7709
7710         case GAUDI_EVENT_HBM0_SPI_1:
7711         case GAUDI_EVENT_HBM1_SPI_1:
7712         case GAUDI_EVENT_HBM2_SPI_1:
7713         case GAUDI_EVENT_HBM3_SPI_1:
7714                 gaudi_print_irq_info(hdev, event_type, false);
7715                 gaudi_hbm_read_interrupts(hdev,
7716                                 gaudi_hbm_event_to_dev(event_type),
7717                                 &eq_entry->hbm_ecc_data);
7718                 hl_fw_unmask_irq(hdev, event_type);
7719                 break;
7720
7721         case GAUDI_EVENT_TPC0_DEC:
7722         case GAUDI_EVENT_TPC1_DEC:
7723         case GAUDI_EVENT_TPC2_DEC:
7724         case GAUDI_EVENT_TPC3_DEC:
7725         case GAUDI_EVENT_TPC4_DEC:
7726         case GAUDI_EVENT_TPC5_DEC:
7727         case GAUDI_EVENT_TPC6_DEC:
7728         case GAUDI_EVENT_TPC7_DEC:
7729                 gaudi_print_irq_info(hdev, event_type, true);
7730                 reset_required = gaudi_tpc_read_interrupts(hdev,
7731                                         tpc_dec_event_to_tpc_id(event_type),
7732                                         "AXI_SLV_DEC_Error");
7733                 if (reset_required) {
7734                         dev_err(hdev->dev, "reset required due to %s\n",
7735                                 gaudi_irq_map_table[event_type].name);
7736
7737                         reset_direct = true;
7738                         goto reset_device;
7739                 } else {
7740                         hl_fw_unmask_irq(hdev, event_type);
7741                 }
7742                 break;
7743
7744         case GAUDI_EVENT_TPC0_KRN_ERR:
7745         case GAUDI_EVENT_TPC1_KRN_ERR:
7746         case GAUDI_EVENT_TPC2_KRN_ERR:
7747         case GAUDI_EVENT_TPC3_KRN_ERR:
7748         case GAUDI_EVENT_TPC4_KRN_ERR:
7749         case GAUDI_EVENT_TPC5_KRN_ERR:
7750         case GAUDI_EVENT_TPC6_KRN_ERR:
7751         case GAUDI_EVENT_TPC7_KRN_ERR:
7752                 gaudi_print_irq_info(hdev, event_type, true);
7753                 reset_required = gaudi_tpc_read_interrupts(hdev,
7754                                         tpc_krn_event_to_tpc_id(event_type),
7755                                         "KRN_ERR");
7756                 if (reset_required) {
7757                         dev_err(hdev->dev, "reset required due to %s\n",
7758                                 gaudi_irq_map_table[event_type].name);
7759
7760                         reset_direct = true;
7761                         goto reset_device;
7762                 } else {
7763                         hl_fw_unmask_irq(hdev, event_type);
7764                 }
7765                 break;
7766
7767         case GAUDI_EVENT_PCIE_CORE_SERR:
7768         case GAUDI_EVENT_PCIE_IF_SERR:
7769         case GAUDI_EVENT_PCIE_PHY_SERR:
7770         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7771         case GAUDI_EVENT_MME0_ACC_SERR:
7772         case GAUDI_EVENT_MME0_SBAB_SERR:
7773         case GAUDI_EVENT_MME1_ACC_SERR:
7774         case GAUDI_EVENT_MME1_SBAB_SERR:
7775         case GAUDI_EVENT_MME2_ACC_SERR:
7776         case GAUDI_EVENT_MME2_SBAB_SERR:
7777         case GAUDI_EVENT_MME3_ACC_SERR:
7778         case GAUDI_EVENT_MME3_SBAB_SERR:
7779         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7780         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7781         case GAUDI_EVENT_PSOC_MEM_SERR:
7782         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7783         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7784         case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7785         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7786         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7787                 fallthrough;
7788         case GAUDI_EVENT_MMU_SERR:
7789                 gaudi_print_irq_info(hdev, event_type, true);
7790                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7791                 hl_fw_unmask_irq(hdev, event_type);
7792                 break;
7793
7794         case GAUDI_EVENT_PCIE_DEC:
7795         case GAUDI_EVENT_MME0_WBC_RSP:
7796         case GAUDI_EVENT_MME0_SBAB0_RSP:
7797         case GAUDI_EVENT_MME1_WBC_RSP:
7798         case GAUDI_EVENT_MME1_SBAB0_RSP:
7799         case GAUDI_EVENT_MME2_WBC_RSP:
7800         case GAUDI_EVENT_MME2_SBAB0_RSP:
7801         case GAUDI_EVENT_MME3_WBC_RSP:
7802         case GAUDI_EVENT_MME3_SBAB0_RSP:
7803         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7804         case GAUDI_EVENT_PSOC_AXI_DEC:
7805         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7806         case GAUDI_EVENT_MMU_PAGE_FAULT:
7807         case GAUDI_EVENT_MMU_WR_PERM:
7808         case GAUDI_EVENT_RAZWI_OR_ADC:
7809         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7810         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7811                 fallthrough;
7812         case GAUDI_EVENT_NIC0_QM0:
7813         case GAUDI_EVENT_NIC0_QM1:
7814         case GAUDI_EVENT_NIC1_QM0:
7815         case GAUDI_EVENT_NIC1_QM1:
7816         case GAUDI_EVENT_NIC2_QM0:
7817         case GAUDI_EVENT_NIC2_QM1:
7818         case GAUDI_EVENT_NIC3_QM0:
7819         case GAUDI_EVENT_NIC3_QM1:
7820         case GAUDI_EVENT_NIC4_QM0:
7821         case GAUDI_EVENT_NIC4_QM1:
7822         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7823         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7824                 gaudi_print_irq_info(hdev, event_type, true);
7825                 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7826                 hl_fw_unmask_irq(hdev, event_type);
7827                 break;
7828
7829         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7830                 gaudi_print_irq_info(hdev, event_type, true);
7831                 goto reset_device;
7832
7833         case GAUDI_EVENT_TPC0_BMON_SPMU:
7834         case GAUDI_EVENT_TPC1_BMON_SPMU:
7835         case GAUDI_EVENT_TPC2_BMON_SPMU:
7836         case GAUDI_EVENT_TPC3_BMON_SPMU:
7837         case GAUDI_EVENT_TPC4_BMON_SPMU:
7838         case GAUDI_EVENT_TPC5_BMON_SPMU:
7839         case GAUDI_EVENT_TPC6_BMON_SPMU:
7840         case GAUDI_EVENT_TPC7_BMON_SPMU:
7841         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7842                 gaudi_print_irq_info(hdev, event_type, false);
7843                 hl_fw_unmask_irq(hdev, event_type);
7844                 break;
7845
7846         case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7847                 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7848                 hl_fw_unmask_irq(hdev, event_type);
7849                 break;
7850
7851         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7852                 gaudi_print_irq_info(hdev, event_type, false);
7853                 gaudi_print_sm_sei_info(hdev, event_type,
7854                                         &eq_entry->sm_sei_data);
7855                 rc = hl_state_dump(hdev);
7856                 if (rc)
7857                         dev_err(hdev->dev,
7858                                 "Error during system state dump %d\n", rc);
7859                 hl_fw_unmask_irq(hdev, event_type);
7860                 break;
7861
7862         case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7863                 break;
7864
7865         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7866                 gaudi_print_clk_change_info(hdev, event_type);
7867                 hl_fw_unmask_irq(hdev, event_type);
7868                 break;
7869
7870         case GAUDI_EVENT_PSOC_GPIO_U16_0:
7871                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7872                 dev_err(hdev->dev,
7873                         "Received high temp H/W interrupt %d (cause %d)\n",
7874                         event_type, cause);
7875                 break;
7876
7877         case GAUDI_EVENT_DEV_RESET_REQ:
7878                 gaudi_print_irq_info(hdev, event_type, false);
7879                 goto reset_device;
7880
7881         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7882                 gaudi_print_irq_info(hdev, event_type, false);
7883                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7884                 goto reset_device;
7885
7886         case GAUDI_EVENT_FW_ALIVE_S:
7887                 gaudi_print_irq_info(hdev, event_type, false);
7888                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7889                 goto reset_device;
7890
7891         default:
7892                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7893                                 event_type);
7894                 break;
7895         }
7896
7897         if (event_mask)
7898                 hl_notifier_event_send_all(hdev, event_mask);
7899
7900         return;
7901
7902 reset_device:
7903         reset_required = true;
7904
7905         if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7906                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7907
7908                 /* notify on device unavailable while the reset triggered by fw */
7909                 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7910                                         HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7911         } else if (hdev->hard_reset_on_fw_events) {
7912                 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7913                 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7914         } else {
7915                 reset_required = false;
7916         }
7917
7918         /* despite reset doesn't execute. a notification on
7919          * occurred event needs to be sent here
7920          */
7921         hl_notifier_event_send_all(hdev, event_mask);
7922         if (reset_required)
7923                 hl_device_reset(hdev, flags);
7924         else
7925                 hl_fw_unmask_irq(hdev, event_type);
7926 }
7927
7928 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7929 {
7930         struct gaudi_device *gaudi = hdev->asic_specific;
7931
7932         if (aggregate) {
7933                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7934                 return gaudi->events_stat_aggregate;
7935         }
7936
7937         *size = (u32) sizeof(gaudi->events_stat);
7938         return gaudi->events_stat;
7939 }
7940
7941 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7942 {
7943         struct gaudi_device *gaudi = hdev->asic_specific;
7944         u32 status, timeout_usec;
7945         int rc;
7946
7947         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7948                 hdev->reset_info.hard_reset_pending)
7949                 return 0;
7950
7951         if (hdev->pldm)
7952                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7953         else
7954                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7955
7956         /* L0 & L1 invalidation */
7957         WREG32(mmSTLB_INV_PS, 3);
7958         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7959         WREG32(mmSTLB_INV_PS, 2);
7960
7961         rc = hl_poll_timeout(
7962                 hdev,
7963                 mmSTLB_INV_PS,
7964                 status,
7965                 !status,
7966                 1000,
7967                 timeout_usec);
7968
7969         WREG32(mmSTLB_INV_SET, 0);
7970
7971         return rc;
7972 }
7973
7974 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7975                                                 bool is_hard, u32 flags,
7976                                                 u32 asid, u64 va, u64 size)
7977 {
7978         /* Treat as invalidate all because there is no range invalidation
7979          * in Gaudi
7980          */
7981         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7982 }
7983
7984 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7985 {
7986         u32 status, timeout_usec;
7987         int rc;
7988
7989         if (hdev->pldm)
7990                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7991         else
7992                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7993
7994         WREG32(MMU_ASID, asid);
7995         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7996         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7997         WREG32(MMU_BUSY, 0x80000000);
7998
7999         rc = hl_poll_timeout(
8000                 hdev,
8001                 MMU_BUSY,
8002                 status,
8003                 !(status & 0x80000000),
8004                 1000,
8005                 timeout_usec);
8006
8007         if (rc) {
8008                 dev_err(hdev->dev,
8009                         "Timeout during MMU hop0 config of asid %d\n", asid);
8010                 return rc;
8011         }
8012
8013         return 0;
8014 }
8015
8016 static int gaudi_send_heartbeat(struct hl_device *hdev)
8017 {
8018         struct gaudi_device *gaudi = hdev->asic_specific;
8019
8020         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8021                 return 0;
8022
8023         return hl_fw_send_heartbeat(hdev);
8024 }
8025
8026 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8027 {
8028         struct gaudi_device *gaudi = hdev->asic_specific;
8029         struct asic_fixed_properties *prop = &hdev->asic_prop;
8030         int rc;
8031
8032         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8033                 return 0;
8034
8035         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8036                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8037                                         mmCPU_BOOT_ERR1);
8038         if (rc)
8039                 return rc;
8040
8041         if (!strlen(prop->cpucp_info.card_name))
8042                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8043                                 CARD_NAME_MAX_LEN);
8044
8045         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8046
8047         set_default_power_values(hdev);
8048
8049         return 0;
8050 }
8051
8052 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8053                 struct engines_data *e)
8054 {
8055         struct gaudi_device *gaudi = hdev->asic_specific;
8056         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8057         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8058         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8059         unsigned long *mask = (unsigned long *)mask_arr;
8060         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8061         bool is_idle = true, is_eng_idle, is_slave;
8062         u64 offset;
8063         int i, dma_id, port;
8064
8065         if (e)
8066                 hl_engine_data_sprintf(e,
8067                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8068                         "---  -------  ------------  ----------  -------------\n");
8069
8070         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8071                 dma_id = gaudi_dma_assignment[i];
8072                 offset = dma_id * DMA_QMAN_OFFSET;
8073
8074                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8075                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8076                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8077                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8078                                 IS_DMA_IDLE(dma_core_sts0);
8079                 is_idle &= is_eng_idle;
8080
8081                 if (mask && !is_eng_idle)
8082                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8083                 if (e)
8084                         hl_engine_data_sprintf(e, fmt, dma_id,
8085                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8086                                 qm_cgm_sts, dma_core_sts0);
8087         }
8088
8089         if (e)
8090                 hl_engine_data_sprintf(e,
8091                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8092                         "---  -------  ------------  ----------  ----------\n");
8093
8094         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8095                 offset = i * TPC_QMAN_OFFSET;
8096                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8097                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8098                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8099                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8100                                 IS_TPC_IDLE(tpc_cfg_sts);
8101                 is_idle &= is_eng_idle;
8102
8103                 if (mask && !is_eng_idle)
8104                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8105                 if (e)
8106                         hl_engine_data_sprintf(e, fmt, i,
8107                                 is_eng_idle ? "Y" : "N",
8108                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8109         }
8110
8111         if (e)
8112                 hl_engine_data_sprintf(e,
8113                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8114                         "---  -------  ------------  ----------  -----------\n");
8115
8116         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8117                 offset = i * MME_QMAN_OFFSET;
8118                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8119                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8120
8121                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8122                 is_slave = i % 2;
8123                 if (!is_slave) {
8124                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8125                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8126                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8127                 }
8128
8129                 is_idle &= is_eng_idle;
8130
8131                 if (mask && !is_eng_idle)
8132                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8133                 if (e) {
8134                         if (!is_slave)
8135                                 hl_engine_data_sprintf(e, fmt, i,
8136                                         is_eng_idle ? "Y" : "N",
8137                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8138                         else
8139                                 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8140                                         is_eng_idle ? "Y" : "N", "-",
8141                                         "-", mme_arch_sts);
8142                 }
8143         }
8144
8145         if (e)
8146                 hl_engine_data_sprintf(e,
8147                                 "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8148                                 "---  -------  ------------  ----------\n");
8149
8150         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8151                 offset = i * NIC_MACRO_QMAN_OFFSET;
8152                 port = 2 * i;
8153                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8154                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8155                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8156                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8157                         is_idle &= is_eng_idle;
8158
8159                         if (mask && !is_eng_idle)
8160                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8161                         if (e)
8162                                 hl_engine_data_sprintf(e, nic_fmt, port,
8163                                                 is_eng_idle ? "Y" : "N",
8164                                                 qm_glbl_sts0, qm_cgm_sts);
8165                 }
8166
8167                 port = 2 * i + 1;
8168                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8169                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8170                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8171                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8172                         is_idle &= is_eng_idle;
8173
8174                         if (mask && !is_eng_idle)
8175                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8176                         if (e)
8177                                 hl_engine_data_sprintf(e, nic_fmt, port,
8178                                                 is_eng_idle ? "Y" : "N",
8179                                                 qm_glbl_sts0, qm_cgm_sts);
8180                 }
8181         }
8182
8183         if (e)
8184                 hl_engine_data_sprintf(e, "\n");
8185
8186         return is_idle;
8187 }
8188
8189 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8190         __acquires(&gaudi->hw_queues_lock)
8191 {
8192         struct gaudi_device *gaudi = hdev->asic_specific;
8193
8194         spin_lock(&gaudi->hw_queues_lock);
8195 }
8196
8197 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8198         __releases(&gaudi->hw_queues_lock)
8199 {
8200         struct gaudi_device *gaudi = hdev->asic_specific;
8201
8202         spin_unlock(&gaudi->hw_queues_lock);
8203 }
8204
8205 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8206 {
8207         return hdev->pdev->device;
8208 }
8209
8210 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8211                                 size_t max_size)
8212 {
8213         struct gaudi_device *gaudi = hdev->asic_specific;
8214
8215         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8216                 return 0;
8217
8218         return hl_fw_get_eeprom_data(hdev, data, max_size);
8219 }
8220
8221 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8222 {
8223         struct gaudi_device *gaudi = hdev->asic_specific;
8224
8225         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8226                 return 0;
8227
8228         return hl_fw_get_monitor_dump(hdev, data);
8229 }
8230
8231 /*
8232  * this function should be used only during initialization and/or after reset,
8233  * when there are no active users.
8234  */
8235 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8236 {
8237         u64 kernel_timeout;
8238         u32 status, offset;
8239         int rc;
8240
8241         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8242
8243         if (hdev->pldm)
8244                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8245         else
8246                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8247
8248         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8249                         lower_32_bits(tpc_kernel));
8250         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8251                         upper_32_bits(tpc_kernel));
8252
8253         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8254                         lower_32_bits(tpc_kernel));
8255         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8256                         upper_32_bits(tpc_kernel));
8257         /* set a valid LUT pointer, content is of no significance */
8258         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8259                         lower_32_bits(tpc_kernel));
8260         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8261                         upper_32_bits(tpc_kernel));
8262
8263         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8264                         lower_32_bits(CFG_BASE +
8265                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8266
8267         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8268                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8269                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8270         /* wait a bit for the engine to start executing */
8271         usleep_range(1000, 1500);
8272
8273         /* wait until engine has finished executing */
8274         rc = hl_poll_timeout(
8275                 hdev,
8276                 mmTPC0_CFG_STATUS + offset,
8277                 status,
8278                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8279                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8280                 1000,
8281                 kernel_timeout);
8282
8283         if (rc) {
8284                 dev_err(hdev->dev,
8285                         "Timeout while waiting for TPC%d icache prefetch\n",
8286                         tpc_id);
8287                 return -EIO;
8288         }
8289
8290         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8291                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8292
8293         /* wait a bit for the engine to start executing */
8294         usleep_range(1000, 1500);
8295
8296         /* wait until engine has finished executing */
8297         rc = hl_poll_timeout(
8298                 hdev,
8299                 mmTPC0_CFG_STATUS + offset,
8300                 status,
8301                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8302                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8303                 1000,
8304                 kernel_timeout);
8305
8306         if (rc) {
8307                 dev_err(hdev->dev,
8308                         "Timeout while waiting for TPC%d vector pipe\n",
8309                         tpc_id);
8310                 return -EIO;
8311         }
8312
8313         rc = hl_poll_timeout(
8314                 hdev,
8315                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8316                 status,
8317                 (status == 0),
8318                 1000,
8319                 kernel_timeout);
8320
8321         if (rc) {
8322                 dev_err(hdev->dev,
8323                         "Timeout while waiting for TPC%d kernel to execute\n",
8324                         tpc_id);
8325                 return -EIO;
8326         }
8327
8328         return 0;
8329 }
8330
8331 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8332                 struct hl_ctx *ctx)
8333 {
8334         struct gaudi_device *gaudi = hdev->asic_specific;
8335         int min_alloc_order, rc, collective_cb_size;
8336
8337         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8338                 return 0;
8339
8340         hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8341                                                         HOST_SPACE_INTERNAL_CB_SZ,
8342                                                         &hdev->internal_cb_pool_dma_addr,
8343                                                         GFP_KERNEL | __GFP_ZERO);
8344
8345         if (!hdev->internal_cb_pool_virt_addr)
8346                 return -ENOMEM;
8347
8348         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8349                         sizeof(struct packet_fence);
8350         min_alloc_order = ilog2(collective_cb_size);
8351
8352         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8353         if (!hdev->internal_cb_pool) {
8354                 dev_err(hdev->dev,
8355                         "Failed to create internal CB pool\n");
8356                 rc = -ENOMEM;
8357                 goto free_internal_cb_pool;
8358         }
8359
8360         rc = gen_pool_add(hdev->internal_cb_pool,
8361                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8362                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8363         if (rc) {
8364                 dev_err(hdev->dev,
8365                         "Failed to add memory to internal CB pool\n");
8366                 rc = -EFAULT;
8367                 goto destroy_internal_cb_pool;
8368         }
8369
8370         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8371                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8372                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8373
8374         if (!hdev->internal_cb_va_base) {
8375                 rc = -ENOMEM;
8376                 goto destroy_internal_cb_pool;
8377         }
8378
8379         mutex_lock(&ctx->mmu_lock);
8380         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8381                         hdev->internal_cb_pool_dma_addr,
8382                         HOST_SPACE_INTERNAL_CB_SZ);
8383
8384         hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8385         mutex_unlock(&ctx->mmu_lock);
8386
8387         if (rc)
8388                 goto unreserve_internal_cb_pool;
8389
8390         return 0;
8391
8392 unreserve_internal_cb_pool:
8393         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8394                         HOST_SPACE_INTERNAL_CB_SZ);
8395 destroy_internal_cb_pool:
8396         gen_pool_destroy(hdev->internal_cb_pool);
8397 free_internal_cb_pool:
8398         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8399                                         hdev->internal_cb_pool_dma_addr);
8400
8401         return rc;
8402 }
8403
8404 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8405                 struct hl_ctx *ctx)
8406 {
8407         struct gaudi_device *gaudi = hdev->asic_specific;
8408
8409         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8410                 return;
8411
8412         mutex_lock(&ctx->mmu_lock);
8413         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8414                         HOST_SPACE_INTERNAL_CB_SZ);
8415         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8416                         HOST_SPACE_INTERNAL_CB_SZ);
8417         hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8418         mutex_unlock(&ctx->mmu_lock);
8419
8420         gen_pool_destroy(hdev->internal_cb_pool);
8421
8422         hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8423                                         hdev->internal_cb_pool_dma_addr);
8424 }
8425
8426 static int gaudi_ctx_init(struct hl_ctx *ctx)
8427 {
8428         int rc;
8429
8430         if (ctx->asid == HL_KERNEL_ASID_ID)
8431                 return 0;
8432
8433         rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8434         if (rc)
8435                 return rc;
8436
8437         rc = gaudi_restore_user_registers(ctx->hdev);
8438         if (rc)
8439                 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8440
8441         return rc;
8442 }
8443
8444 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8445 {
8446         if (ctx->asid == HL_KERNEL_ASID_ID)
8447                 return;
8448
8449         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8450 }
8451
8452 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8453 {
8454         return 0;
8455 }
8456
8457 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8458 {
8459         return gaudi_cq_assignment[cq_idx];
8460 }
8461
8462 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8463 {
8464         return sizeof(struct packet_msg_short) +
8465                         sizeof(struct packet_msg_prot) * 2;
8466 }
8467
8468 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8469 {
8470         return sizeof(struct packet_msg_short) * 4 +
8471                         sizeof(struct packet_fence) +
8472                         sizeof(struct packet_msg_prot) * 2;
8473 }
8474
8475 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8476 {
8477         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8478 }
8479
8480 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8481                                 u32 size, bool eb)
8482 {
8483         struct hl_cb *cb = (struct hl_cb *) data;
8484         struct packet_msg_short *pkt;
8485         u32 value, ctl, pkt_size = sizeof(*pkt);
8486
8487         pkt = cb->kernel_address + size;
8488         memset(pkt, 0, pkt_size);
8489
8490         /* Inc by 1, Mode ADD */
8491         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8492         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8493
8494         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8495         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8496         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8497         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8498         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8499         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8500         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8501
8502         pkt->value = cpu_to_le32(value);
8503         pkt->ctl = cpu_to_le32(ctl);
8504
8505         return size + pkt_size;
8506 }
8507
8508 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8509                                         u16 addr)
8510 {
8511         u32 ctl, pkt_size = sizeof(*pkt);
8512
8513         memset(pkt, 0, pkt_size);
8514
8515         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8516         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8517         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8518         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8519         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8520         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8521
8522         pkt->value = cpu_to_le32(value);
8523         pkt->ctl = cpu_to_le32(ctl);
8524
8525         return pkt_size;
8526 }
8527
8528 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8529                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8530                 u16 sob_val, u16 mon_id)
8531 {
8532         u64 monitor_base;
8533         u32 ctl, value, pkt_size = sizeof(*pkt);
8534         u16 msg_addr_offset;
8535         u8 mask;
8536
8537         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8538                 dev_err(hdev->dev,
8539                         "sob_base %u (mask %#x) is not valid\n",
8540                         sob_base, sob_mask);
8541                 return 0;
8542         }
8543
8544         /*
8545          * monitor_base should be the content of the base0 address registers,
8546          * so it will be added to the msg short offsets
8547          */
8548         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8549
8550         msg_addr_offset =
8551                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8552                                 monitor_base;
8553
8554         memset(pkt, 0, pkt_size);
8555
8556         /* Monitor config packet: bind the monitor to a sync object */
8557         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8558         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8559         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8560                         0); /* GREATER OR EQUAL*/
8561         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8562
8563         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8564         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8565         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8566         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8567         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8568         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8569         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8570
8571         pkt->value = cpu_to_le32(value);
8572         pkt->ctl = cpu_to_le32(ctl);
8573
8574         return pkt_size;
8575 }
8576
8577 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8578 {
8579         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8580
8581         memset(pkt, 0, pkt_size);
8582
8583         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8584         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8585         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8586
8587         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8588         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8589         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8590         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8591
8592         pkt->cfg = cpu_to_le32(cfg);
8593         pkt->ctl = cpu_to_le32(ctl);
8594
8595         return pkt_size;
8596 }
8597
8598 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8599 {
8600         u32 offset, nic_index;
8601
8602         switch (queue_id) {
8603         case GAUDI_QUEUE_ID_DMA_0_0:
8604                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8605                 break;
8606         case GAUDI_QUEUE_ID_DMA_0_1:
8607                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8608                 break;
8609         case GAUDI_QUEUE_ID_DMA_0_2:
8610                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8611                 break;
8612         case GAUDI_QUEUE_ID_DMA_0_3:
8613                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8614                 break;
8615         case GAUDI_QUEUE_ID_DMA_1_0:
8616                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8617                 break;
8618         case GAUDI_QUEUE_ID_DMA_1_1:
8619                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8620                 break;
8621         case GAUDI_QUEUE_ID_DMA_1_2:
8622                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8623                 break;
8624         case GAUDI_QUEUE_ID_DMA_1_3:
8625                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8626                 break;
8627         case GAUDI_QUEUE_ID_DMA_5_0:
8628                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8629                 break;
8630         case GAUDI_QUEUE_ID_DMA_5_1:
8631                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8632                 break;
8633         case GAUDI_QUEUE_ID_DMA_5_2:
8634                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8635                 break;
8636         case GAUDI_QUEUE_ID_DMA_5_3:
8637                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8638                 break;
8639         case GAUDI_QUEUE_ID_TPC_7_0:
8640                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8641                 break;
8642         case GAUDI_QUEUE_ID_TPC_7_1:
8643                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8644                 break;
8645         case GAUDI_QUEUE_ID_TPC_7_2:
8646                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8647                 break;
8648         case GAUDI_QUEUE_ID_TPC_7_3:
8649                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8650                 break;
8651         case GAUDI_QUEUE_ID_NIC_0_0:
8652         case GAUDI_QUEUE_ID_NIC_1_0:
8653         case GAUDI_QUEUE_ID_NIC_2_0:
8654         case GAUDI_QUEUE_ID_NIC_3_0:
8655         case GAUDI_QUEUE_ID_NIC_4_0:
8656         case GAUDI_QUEUE_ID_NIC_5_0:
8657         case GAUDI_QUEUE_ID_NIC_6_0:
8658         case GAUDI_QUEUE_ID_NIC_7_0:
8659         case GAUDI_QUEUE_ID_NIC_8_0:
8660         case GAUDI_QUEUE_ID_NIC_9_0:
8661                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8662                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8663                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8664                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8665                 break;
8666         case GAUDI_QUEUE_ID_NIC_0_1:
8667         case GAUDI_QUEUE_ID_NIC_1_1:
8668         case GAUDI_QUEUE_ID_NIC_2_1:
8669         case GAUDI_QUEUE_ID_NIC_3_1:
8670         case GAUDI_QUEUE_ID_NIC_4_1:
8671         case GAUDI_QUEUE_ID_NIC_5_1:
8672         case GAUDI_QUEUE_ID_NIC_6_1:
8673         case GAUDI_QUEUE_ID_NIC_7_1:
8674         case GAUDI_QUEUE_ID_NIC_8_1:
8675         case GAUDI_QUEUE_ID_NIC_9_1:
8676                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8677                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8678                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8679                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8680                 break;
8681         case GAUDI_QUEUE_ID_NIC_0_2:
8682         case GAUDI_QUEUE_ID_NIC_1_2:
8683         case GAUDI_QUEUE_ID_NIC_2_2:
8684         case GAUDI_QUEUE_ID_NIC_3_2:
8685         case GAUDI_QUEUE_ID_NIC_4_2:
8686         case GAUDI_QUEUE_ID_NIC_5_2:
8687         case GAUDI_QUEUE_ID_NIC_6_2:
8688         case GAUDI_QUEUE_ID_NIC_7_2:
8689         case GAUDI_QUEUE_ID_NIC_8_2:
8690         case GAUDI_QUEUE_ID_NIC_9_2:
8691                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8692                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8693                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8694                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8695                 break;
8696         case GAUDI_QUEUE_ID_NIC_0_3:
8697         case GAUDI_QUEUE_ID_NIC_1_3:
8698         case GAUDI_QUEUE_ID_NIC_2_3:
8699         case GAUDI_QUEUE_ID_NIC_3_3:
8700         case GAUDI_QUEUE_ID_NIC_4_3:
8701         case GAUDI_QUEUE_ID_NIC_5_3:
8702         case GAUDI_QUEUE_ID_NIC_6_3:
8703         case GAUDI_QUEUE_ID_NIC_7_3:
8704         case GAUDI_QUEUE_ID_NIC_8_3:
8705         case GAUDI_QUEUE_ID_NIC_9_3:
8706                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8707                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8708                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8709                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8710                 break;
8711         default:
8712                 return -EINVAL;
8713         }
8714
8715         *addr = CFG_BASE + offset;
8716
8717         return 0;
8718 }
8719
8720 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8721 {
8722         u64 monitor_base;
8723         u32 size = 0;
8724         u16 msg_addr_offset;
8725
8726         /*
8727          * monitor_base should be the content of the base0 address registers,
8728          * so it will be added to the msg short offsets
8729          */
8730         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8731
8732         /* First monitor config packet: low address of the sync */
8733         msg_addr_offset =
8734                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8735                                 monitor_base;
8736
8737         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8738                                         msg_addr_offset);
8739
8740         /* Second monitor config packet: high address of the sync */
8741         msg_addr_offset =
8742                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8743                                 monitor_base;
8744
8745         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8746                                         msg_addr_offset);
8747
8748         /*
8749          * Third monitor config packet: the payload, i.e. what to write when the
8750          * sync triggers
8751          */
8752         msg_addr_offset =
8753                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8754                                 monitor_base;
8755
8756         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8757
8758         return size;
8759 }
8760
8761 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8762                                 struct hl_gen_wait_properties *prop)
8763 {
8764         struct hl_cb *cb = (struct hl_cb *) prop->data;
8765         void *buf = cb->kernel_address;
8766         u64 fence_addr = 0;
8767         u32 size = prop->size;
8768
8769         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8770                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8771                                 prop->q_idx);
8772                 return 0;
8773         }
8774
8775         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8776         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8777                         prop->sob_mask, prop->sob_val, prop->mon_id);
8778         size += gaudi_add_fence_pkt(buf + size);
8779
8780         return size;
8781 }
8782
8783 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8784 {
8785         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8786
8787         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8788                 hw_sob->sob_id);
8789
8790         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8791                         hw_sob->sob_id * 4, 0);
8792
8793         kref_init(&hw_sob->kref);
8794 }
8795
8796 static u64 gaudi_get_device_time(struct hl_device *hdev)
8797 {
8798         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8799
8800         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8801 }
8802
8803 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8804                                 u32 *block_size, u32 *block_id)
8805 {
8806         return -EPERM;
8807 }
8808
8809 static int gaudi_block_mmap(struct hl_device *hdev,
8810                                 struct vm_area_struct *vma,
8811                                 u32 block_id, u32 block_size)
8812 {
8813         return -EPERM;
8814 }
8815
8816 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8817 {
8818         struct cpu_dyn_regs *dyn_regs =
8819                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8820         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8821                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8822                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
8823
8824         WREG32(irq_handler_offset,
8825                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8826 }
8827
8828 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8829 {
8830         return -EINVAL;
8831 }
8832
8833 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8834 {
8835         switch (pll_idx) {
8836         case HL_GAUDI_CPU_PLL: return CPU_PLL;
8837         case HL_GAUDI_PCI_PLL: return PCI_PLL;
8838         case HL_GAUDI_NIC_PLL: return NIC_PLL;
8839         case HL_GAUDI_DMA_PLL: return DMA_PLL;
8840         case HL_GAUDI_MESH_PLL: return MESH_PLL;
8841         case HL_GAUDI_MME_PLL: return MME_PLL;
8842         case HL_GAUDI_TPC_PLL: return TPC_PLL;
8843         case HL_GAUDI_IF_PLL: return IF_PLL;
8844         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8845         case HL_GAUDI_HBM_PLL: return HBM_PLL;
8846         default: return -EINVAL;
8847         }
8848 }
8849
8850 static int gaudi_add_sync_to_engine_map_entry(
8851         struct hl_sync_to_engine_map *map, u32 reg_value,
8852         enum hl_sync_engine_type engine_type, u32 engine_id)
8853 {
8854         struct hl_sync_to_engine_map_entry *entry;
8855
8856         /* Reg value represents a partial address of sync object,
8857          * it is used as unique identifier. For this we need to
8858          * clear the cutoff cfg base bits from the value.
8859          */
8860         if (reg_value == 0 || reg_value == 0xffffffff)
8861                 return 0;
8862         reg_value -= lower_32_bits(CFG_BASE);
8863
8864         /* create a new hash entry */
8865         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8866         if (!entry)
8867                 return -ENOMEM;
8868         entry->engine_type = engine_type;
8869         entry->engine_id = engine_id;
8870         entry->sync_id = reg_value;
8871         hash_add(map->tb, &entry->node, reg_value);
8872
8873         return 0;
8874 }
8875
8876 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8877                                 struct hl_sync_to_engine_map *map)
8878 {
8879         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8880         int i, j, rc;
8881         u32 reg_value;
8882
8883         /* Iterate over TPC engines */
8884         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8885
8886                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8887                                         sds->props[SP_NEXT_TPC] * i);
8888
8889                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8890                                                         ENGINE_TPC, i);
8891                 if (rc)
8892                         goto free_sync_to_engine_map;
8893         }
8894
8895         /* Iterate over MME engines */
8896         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8897                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8898
8899                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8900                                                 sds->props[SP_NEXT_MME] * i +
8901                                                 j * sizeof(u32));
8902
8903                         rc = gaudi_add_sync_to_engine_map_entry(
8904                                 map, reg_value, ENGINE_MME,
8905                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8906                         if (rc)
8907                                 goto free_sync_to_engine_map;
8908                 }
8909         }
8910
8911         /* Iterate over DMA engines */
8912         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8913                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8914                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
8915                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8916                                                         ENGINE_DMA, i);
8917                 if (rc)
8918                         goto free_sync_to_engine_map;
8919         }
8920
8921         return 0;
8922
8923 free_sync_to_engine_map:
8924         hl_state_dump_free_sync_to_engine_map(map);
8925
8926         return rc;
8927 }
8928
8929 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8930 {
8931         return FIELD_GET(
8932                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8933                 mon->status);
8934 }
8935
8936 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8937 {
8938         const size_t max_write = 10;
8939         u32 gid, mask, sob;
8940         int i, offset;
8941
8942         /* Sync object ID is calculated as follows:
8943          * (8 * group_id + cleared bits in mask)
8944          */
8945         gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8946                         mon->arm_data);
8947         mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8948                         mon->arm_data);
8949
8950         for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8951                 max_write; mask >>= 1, i++) {
8952                 if (!(mask & 1)) {
8953                         sob = gid * MONITOR_MAX_SOBS + i;
8954
8955                         if (offset > 0)
8956                                 offset += snprintf(sobs + offset, max_write,
8957                                                         ", ");
8958
8959                         offset += snprintf(sobs + offset, max_write, "%u", sob);
8960                 }
8961         }
8962 }
8963
8964 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8965                                 struct hl_device *hdev,
8966                                 struct hl_mon_state_dump *mon)
8967 {
8968         const char *name;
8969         char scratch_buf1[BIN_REG_STRING_SIZE],
8970                 scratch_buf2[BIN_REG_STRING_SIZE];
8971         char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8972
8973         name = hl_state_dump_get_monitor_name(hdev, mon);
8974         if (!name)
8975                 name = "";
8976
8977         gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8978
8979         return hl_snprintf_resize(
8980                 buf, size, offset,
8981                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8982                 mon->id, name,
8983                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8984                                 mon->arm_data),
8985                 hl_format_as_binary(
8986                         scratch_buf1, sizeof(scratch_buf1),
8987                         FIELD_GET(
8988                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8989                                 mon->arm_data)),
8990                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8991                                 mon->arm_data),
8992                 mon->wr_data,
8993                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8994                 hl_format_as_binary(
8995                         scratch_buf2, sizeof(scratch_buf2),
8996                         FIELD_GET(
8997                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8998                                 mon->status)),
8999                 monitored_sobs);
9000 }
9001
9002
9003 static int gaudi_print_fences_single_engine(
9004         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9005         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9006         size_t *size, size_t *offset)
9007 {
9008         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9009         int rc = -ENOMEM, i;
9010         u32 *statuses, *fences;
9011
9012         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9013                         sizeof(*statuses), GFP_KERNEL);
9014         if (!statuses)
9015                 goto out;
9016
9017         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9018                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9019                          sizeof(*fences), GFP_KERNEL);
9020         if (!fences)
9021                 goto free_status;
9022
9023         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9024                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9025
9026         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9027                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9028                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9029
9030         /* The actual print */
9031         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9032                 u32 fence_id;
9033                 u64 fence_cnt, fence_rdata;
9034                 const char *engine_name;
9035
9036                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9037                         statuses[i]))
9038                         continue;
9039
9040                 fence_id =
9041                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9042                 fence_cnt = base_offset + CFG_BASE +
9043                         sizeof(u32) *
9044                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9045                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9046                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9047                 engine_name = hl_sync_engine_to_string(engine_type);
9048
9049                 rc = hl_snprintf_resize(
9050                         buf, size, offset,
9051                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9052                         engine_name, engine_id,
9053                         i, fence_id,
9054                         fence_cnt, engine_name, engine_id, fence_id, i,
9055                         fence_rdata, engine_name, engine_id, fence_id, i,
9056                         fences[fence_id],
9057                         statuses[i]);
9058                 if (rc)
9059                         goto free_fences;
9060         }
9061
9062         rc = 0;
9063
9064 free_fences:
9065         kfree(fences);
9066 free_status:
9067         kfree(statuses);
9068 out:
9069         return rc;
9070 }
9071
9072
9073 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9074         .monitor_valid = gaudi_monitor_valid,
9075         .print_single_monitor = gaudi_print_single_monitor,
9076         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9077         .print_fences_single_engine = gaudi_print_fences_single_engine,
9078 };
9079
9080 static void gaudi_state_dump_init(struct hl_device *hdev)
9081 {
9082         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9083         int i;
9084
9085         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9086                 hash_add(sds->so_id_to_str_tb,
9087                         &gaudi_so_id_to_str[i].node,
9088                         gaudi_so_id_to_str[i].id);
9089
9090         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9091                 hash_add(sds->monitor_id_to_str_tb,
9092                         &gaudi_monitor_id_to_str[i].node,
9093                         gaudi_monitor_id_to_str[i].id);
9094
9095         sds->props = gaudi_state_dump_specs_props;
9096
9097         sds->sync_namager_names = gaudi_sync_manager_names;
9098
9099         sds->funcs = gaudi_state_dump_funcs;
9100 }
9101
9102 static u32 *gaudi_get_stream_master_qid_arr(void)
9103 {
9104         return gaudi_stream_master;
9105 }
9106
9107 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9108 {
9109 }
9110
9111 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9112 {
9113         struct hl_device *hdev = dev_get_drvdata(dev);
9114         struct cpucp_info *cpucp_info;
9115
9116         cpucp_info = &hdev->asic_prop.cpucp_info;
9117
9118         return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9119 }
9120
9121 static DEVICE_ATTR_RO(infineon_ver);
9122
9123 static struct attribute *gaudi_vrm_dev_attrs[] = {
9124         &dev_attr_infineon_ver.attr,
9125         NULL,
9126 };
9127
9128 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9129                                         struct attribute_group *dev_vrm_attr_grp)
9130 {
9131         hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9132         dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9133 }
9134
9135 static const struct hl_asic_funcs gaudi_funcs = {
9136         .early_init = gaudi_early_init,
9137         .early_fini = gaudi_early_fini,
9138         .late_init = gaudi_late_init,
9139         .late_fini = gaudi_late_fini,
9140         .sw_init = gaudi_sw_init,
9141         .sw_fini = gaudi_sw_fini,
9142         .hw_init = gaudi_hw_init,
9143         .hw_fini = gaudi_hw_fini,
9144         .halt_engines = gaudi_halt_engines,
9145         .suspend = gaudi_suspend,
9146         .resume = gaudi_resume,
9147         .mmap = gaudi_mmap,
9148         .ring_doorbell = gaudi_ring_doorbell,
9149         .pqe_write = gaudi_pqe_write,
9150         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9151         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9152         .scrub_device_mem = gaudi_scrub_device_mem,
9153         .scrub_device_dram = gaudi_scrub_device_dram,
9154         .get_int_queue_base = gaudi_get_int_queue_base,
9155         .test_queues = gaudi_test_queues,
9156         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9157         .asic_dma_pool_free = gaudi_dma_pool_free,
9158         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9159         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9160         .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
9161         .cs_parser = gaudi_cs_parser,
9162         .asic_dma_map_sgtable = hl_dma_map_sgtable,
9163         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9164         .update_eq_ci = gaudi_update_eq_ci,
9165         .context_switch = gaudi_context_switch,
9166         .restore_phase_topology = gaudi_restore_phase_topology,
9167         .debugfs_read_dma = gaudi_debugfs_read_dma,
9168         .add_device_attr = gaudi_add_device_attr,
9169         .handle_eqe = gaudi_handle_eqe,
9170         .get_events_stat = gaudi_get_events_stat,
9171         .read_pte = gaudi_read_pte,
9172         .write_pte = gaudi_write_pte,
9173         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9174         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9175         .mmu_prefetch_cache_range = NULL,
9176         .send_heartbeat = gaudi_send_heartbeat,
9177         .debug_coresight = gaudi_debug_coresight,
9178         .is_device_idle = gaudi_is_device_idle,
9179         .compute_reset_late_init = gaudi_compute_reset_late_init,
9180         .hw_queues_lock = gaudi_hw_queues_lock,
9181         .hw_queues_unlock = gaudi_hw_queues_unlock,
9182         .get_pci_id = gaudi_get_pci_id,
9183         .get_eeprom_data = gaudi_get_eeprom_data,
9184         .get_monitor_dump = gaudi_get_monitor_dump,
9185         .send_cpu_message = gaudi_send_cpu_message,
9186         .pci_bars_map = gaudi_pci_bars_map,
9187         .init_iatu = gaudi_init_iatu,
9188         .rreg = hl_rreg,
9189         .wreg = hl_wreg,
9190         .halt_coresight = gaudi_halt_coresight,
9191         .ctx_init = gaudi_ctx_init,
9192         .ctx_fini = gaudi_ctx_fini,
9193         .pre_schedule_cs = gaudi_pre_schedule_cs,
9194         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9195         .load_firmware_to_device = gaudi_load_firmware_to_device,
9196         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9197         .get_signal_cb_size = gaudi_get_signal_cb_size,
9198         .get_wait_cb_size = gaudi_get_wait_cb_size,
9199         .gen_signal_cb = gaudi_gen_signal_cb,
9200         .gen_wait_cb = gaudi_gen_wait_cb,
9201         .reset_sob = gaudi_reset_sob,
9202         .reset_sob_group = gaudi_reset_sob_group,
9203         .get_device_time = gaudi_get_device_time,
9204         .pb_print_security_errors = NULL,
9205         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9206         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9207         .get_dec_base_addr = NULL,
9208         .scramble_addr = hl_mmu_scramble_addr,
9209         .descramble_addr = hl_mmu_descramble_addr,
9210         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9211         .get_hw_block_id = gaudi_get_hw_block_id,
9212         .hw_block_mmap = gaudi_block_mmap,
9213         .enable_events_from_fw = gaudi_enable_events_from_fw,
9214         .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9215         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9216         .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9217         .init_firmware_loader = gaudi_init_firmware_loader,
9218         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9219         .state_dump_init = gaudi_state_dump_init,
9220         .get_sob_addr = gaudi_get_sob_addr,
9221         .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9222         .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9223         .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9224         .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9225         .access_dev_mem = hl_access_dev_mem,
9226         .set_dram_bar_base = gaudi_set_hbm_bar_base,
9227 };
9228
9229 /**
9230  * gaudi_set_asic_funcs - set GAUDI function pointers
9231  *
9232  * @hdev: pointer to hl_device structure
9233  *
9234  */
9235 void gaudi_set_asic_funcs(struct hl_device *hdev)
9236 {
9237         hdev->asic_funcs = &gaudi_funcs;
9238 }