habanalabs: add asic property of host dma offset
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115                 "gaudi cpu eq"
116 };
117
118 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
119         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
121         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
124         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
127 };
128
129 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130         [0] = GAUDI_QUEUE_ID_DMA_0_0,
131         [1] = GAUDI_QUEUE_ID_DMA_0_1,
132         [2] = GAUDI_QUEUE_ID_DMA_0_2,
133         [3] = GAUDI_QUEUE_ID_DMA_0_3,
134         [4] = GAUDI_QUEUE_ID_DMA_1_0,
135         [5] = GAUDI_QUEUE_ID_DMA_1_1,
136         [6] = GAUDI_QUEUE_ID_DMA_1_2,
137         [7] = GAUDI_QUEUE_ID_DMA_1_3,
138 };
139
140 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
142         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
143         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
144         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
145         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
146         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
147         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
148         [PACKET_FENCE]          = sizeof(struct packet_fence),
149         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
150         [PACKET_NOP]            = sizeof(struct packet_nop),
151         [PACKET_STOP]           = sizeof(struct packet_stop),
152         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
153         [PACKET_WAIT]           = sizeof(struct packet_wait),
154         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
155 };
156
157 static inline bool validate_packet_id(enum packet_id id)
158 {
159         switch (id) {
160         case PACKET_WREG_32:
161         case PACKET_WREG_BULK:
162         case PACKET_MSG_LONG:
163         case PACKET_MSG_SHORT:
164         case PACKET_CP_DMA:
165         case PACKET_REPEAT:
166         case PACKET_MSG_PROT:
167         case PACKET_FENCE:
168         case PACKET_LIN_DMA:
169         case PACKET_NOP:
170         case PACKET_STOP:
171         case PACKET_ARB_POINT:
172         case PACKET_WAIT:
173         case PACKET_LOAD_AND_EXE:
174                 return true;
175         default:
176                 return false;
177         }
178 }
179
180 static const char * const
181 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182         "tpc_address_exceed_slm",
183         "tpc_div_by_0",
184         "tpc_spu_mac_overflow",
185         "tpc_spu_addsub_overflow",
186         "tpc_spu_abs_overflow",
187         "tpc_spu_fp_dst_nan_inf",
188         "tpc_spu_fp_dst_denorm",
189         "tpc_vpu_mac_overflow",
190         "tpc_vpu_addsub_overflow",
191         "tpc_vpu_abs_overflow",
192         "tpc_vpu_fp_dst_nan_inf",
193         "tpc_vpu_fp_dst_denorm",
194         "tpc_assertions",
195         "tpc_illegal_instruction",
196         "tpc_pc_wrap_around",
197         "tpc_qm_sw_err",
198         "tpc_hbw_rresp_err",
199         "tpc_hbw_bresp_err",
200         "tpc_lbw_rresp_err",
201         "tpc_lbw_bresp_err"
202 };
203
204 static const char * const
205 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
206         "PQ AXI HBW error",
207         "CQ AXI HBW error",
208         "CP AXI HBW error",
209         "CP error due to undefined OPCODE",
210         "CP encountered STOP OPCODE",
211         "CP AXI LBW error",
212         "CP WRREG32 or WRBULK returned error",
213         "N/A",
214         "FENCE 0 inc over max value and clipped",
215         "FENCE 1 inc over max value and clipped",
216         "FENCE 2 inc over max value and clipped",
217         "FENCE 3 inc over max value and clipped",
218         "FENCE 0 dec under min value and clipped",
219         "FENCE 1 dec under min value and clipped",
220         "FENCE 2 dec under min value and clipped",
221         "FENCE 3 dec under min value and clipped"
222 };
223
224 static const char * const
225 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226         "Choice push while full error",
227         "Choice Q watchdog error",
228         "MSG AXI LBW returned with error"
229 };
230
231 enum gaudi_sm_sei_cause {
232         GAUDI_SM_SEI_SO_OVERFLOW,
233         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234         GAUDI_SM_SEI_AXI_RESPONSE_ERR
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 static const char * const gaudi_sync_manager_names[] = {
434         "SYNC_MGR_E_N", "SYNC_MGR_W_N", "SYNC_MGR_E_S", "SYNC_MGR_W_S",
435         NULL
436 };
437
438 struct ecc_info_extract_params {
439         u64 block_address;
440         u32 num_memories;
441         bool derr;
442         bool disable_clock_gating;
443 };
444
445 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
446                                                                 u64 phys_addr);
447 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
448                                         struct hl_cs_job *job);
449 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
450                                         u32 size, u64 val);
451 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
452                                         u32 num_regs, u32 val);
453 static int gaudi_schedule_register_memset(struct hl_device *hdev,
454                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
455 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
456                                 u32 tpc_id);
457 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
458 static int gaudi_cpucp_info_get(struct hl_device *hdev);
459 static void gaudi_disable_clock_gating(struct hl_device *hdev);
460 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
461 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
462                                 u32 size, bool eb);
463 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
464                                 struct hl_gen_wait_properties *prop);
465
466 static inline enum hl_collective_mode
467 get_collective_mode(struct hl_device *hdev, u32 queue_id)
468 {
469         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
470                 return HL_COLLECTIVE_MASTER;
471
472         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
473                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
474                 return HL_COLLECTIVE_SLAVE;
475
476         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
477                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
478                 return HL_COLLECTIVE_SLAVE;
479
480         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
481                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
482                 return HL_COLLECTIVE_SLAVE;
483
484         return HL_COLLECTIVE_NOT_SUPPORTED;
485 }
486
487 static inline void set_default_power_values(struct hl_device *hdev)
488 {
489         struct asic_fixed_properties *prop = &hdev->asic_prop;
490
491         if (hdev->card_type == cpucp_card_type_pmc) {
492                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
493                 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
494         } else {
495                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
496                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
497         }
498 }
499
500 static int gaudi_set_fixed_properties(struct hl_device *hdev)
501 {
502         struct asic_fixed_properties *prop = &hdev->asic_prop;
503         u32 num_sync_stream_queues = 0;
504         int i;
505
506         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
507         prop->hw_queues_props = kcalloc(prop->max_queues,
508                         sizeof(struct hw_queue_properties),
509                         GFP_KERNEL);
510
511         if (!prop->hw_queues_props)
512                 return -ENOMEM;
513
514         for (i = 0 ; i < prop->max_queues ; i++) {
515                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
516                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
517                         prop->hw_queues_props[i].driver_only = 0;
518                         prop->hw_queues_props[i].supports_sync_stream = 1;
519                         prop->hw_queues_props[i].cb_alloc_flags =
520                                 CB_ALLOC_KERNEL;
521                         num_sync_stream_queues++;
522                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
523                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
524                         prop->hw_queues_props[i].driver_only = 1;
525                         prop->hw_queues_props[i].supports_sync_stream = 0;
526                         prop->hw_queues_props[i].cb_alloc_flags =
527                                 CB_ALLOC_KERNEL;
528                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
529                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
530                         prop->hw_queues_props[i].driver_only = 0;
531                         prop->hw_queues_props[i].supports_sync_stream = 0;
532                         prop->hw_queues_props[i].cb_alloc_flags =
533                                 CB_ALLOC_USER;
534
535                 }
536                 prop->hw_queues_props[i].collective_mode =
537                                                 get_collective_mode(hdev, i);
538         }
539
540         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
541         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
542         prop->collective_first_sob = 0;
543         prop->collective_first_mon = 0;
544
545         /* 2 SOBs per internal queue stream are reserved for collective */
546         prop->sync_stream_first_sob =
547                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
548                         * QMAN_STREAMS * HL_RSVD_SOBS;
549
550         /* 1 monitor per internal queue stream are reserved for collective
551          * 2 monitors per external queue stream are reserved for collective
552          */
553         prop->sync_stream_first_mon =
554                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
555                         (NUMBER_OF_EXT_HW_QUEUES * 2);
556
557         prop->dram_base_address = DRAM_PHYS_BASE;
558         prop->dram_size = GAUDI_HBM_SIZE_32GB;
559         prop->dram_end_address = prop->dram_base_address +
560                                         prop->dram_size;
561         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
562
563         prop->sram_base_address = SRAM_BASE_ADDR;
564         prop->sram_size = SRAM_SIZE;
565         prop->sram_end_address = prop->sram_base_address +
566                                         prop->sram_size;
567         prop->sram_user_base_address = prop->sram_base_address +
568                                         SRAM_USER_BASE_OFFSET;
569
570         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
571         if (hdev->pldm)
572                 prop->mmu_pgt_size = 0x800000; /* 8MB */
573         else
574                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
575         prop->mmu_pte_size = HL_PTE_SIZE;
576         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
577         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
578         prop->dram_page_size = PAGE_SIZE_2MB;
579         prop->dram_supports_virtual_memory = false;
580
581         prop->pmmu.hop0_shift = HOP0_SHIFT;
582         prop->pmmu.hop1_shift = HOP1_SHIFT;
583         prop->pmmu.hop2_shift = HOP2_SHIFT;
584         prop->pmmu.hop3_shift = HOP3_SHIFT;
585         prop->pmmu.hop4_shift = HOP4_SHIFT;
586         prop->pmmu.hop0_mask = HOP0_MASK;
587         prop->pmmu.hop1_mask = HOP1_MASK;
588         prop->pmmu.hop2_mask = HOP2_MASK;
589         prop->pmmu.hop3_mask = HOP3_MASK;
590         prop->pmmu.hop4_mask = HOP4_MASK;
591         prop->pmmu.start_addr = VA_HOST_SPACE_START;
592         prop->pmmu.end_addr =
593                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
594         prop->pmmu.page_size = PAGE_SIZE_4KB;
595         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
596
597         /* PMMU and HPMMU are the same except of page size */
598         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
599         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
600
601         /* shifts and masks are the same in PMMU and DMMU */
602         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
603         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
604         prop->dmmu.end_addr = VA_HOST_SPACE_END;
605         prop->dmmu.page_size = PAGE_SIZE_2MB;
606
607         prop->cfg_size = CFG_SIZE;
608         prop->max_asid = MAX_ASID;
609         prop->num_of_events = GAUDI_EVENT_SIZE;
610         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
611
612         set_default_power_values(hdev);
613
614         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
615         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
616
617         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
618         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
619
620         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
621                                         CARD_NAME_MAX_LEN);
622
623         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
624
625         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
626                         prop->sync_stream_first_sob +
627                         (num_sync_stream_queues * HL_RSVD_SOBS);
628         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
629                         prop->sync_stream_first_mon +
630                         (num_sync_stream_queues * HL_RSVD_MONS);
631
632         prop->first_available_user_msix_interrupt = USHRT_MAX;
633
634         for (i = 0 ; i < HL_MAX_DCORES ; i++)
635                 prop->first_available_cq[i] = USHRT_MAX;
636
637         prop->fw_cpu_boot_dev_sts0_valid = false;
638         prop->fw_cpu_boot_dev_sts1_valid = false;
639         prop->hard_reset_done_by_fw = false;
640         prop->gic_interrupts_enable = true;
641
642         return 0;
643 }
644
645 static int gaudi_pci_bars_map(struct hl_device *hdev)
646 {
647         static const char * const name[] = {"SRAM", "CFG", "HBM"};
648         bool is_wc[3] = {false, false, true};
649         int rc;
650
651         rc = hl_pci_bars_map(hdev, name, is_wc);
652         if (rc)
653                 return rc;
654
655         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
656                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
657
658         return 0;
659 }
660
661 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
662 {
663         struct gaudi_device *gaudi = hdev->asic_specific;
664         struct hl_inbound_pci_region pci_region;
665         u64 old_addr = addr;
666         int rc;
667
668         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
669                 return old_addr;
670
671         if (hdev->asic_prop.iatu_done_by_fw)
672                 return U64_MAX;
673
674         /* Inbound Region 2 - Bar 4 - Point to HBM */
675         pci_region.mode = PCI_BAR_MATCH_MODE;
676         pci_region.bar = HBM_BAR_ID;
677         pci_region.addr = addr;
678         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
679         if (rc)
680                 return U64_MAX;
681
682         if (gaudi) {
683                 old_addr = gaudi->hbm_bar_cur_addr;
684                 gaudi->hbm_bar_cur_addr = addr;
685         }
686
687         return old_addr;
688 }
689
690 static int gaudi_init_iatu(struct hl_device *hdev)
691 {
692         struct hl_inbound_pci_region inbound_region;
693         struct hl_outbound_pci_region outbound_region;
694         int rc;
695
696         if (hdev->asic_prop.iatu_done_by_fw)
697                 return 0;
698
699         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
700         inbound_region.mode = PCI_BAR_MATCH_MODE;
701         inbound_region.bar = SRAM_BAR_ID;
702         inbound_region.addr = SRAM_BASE_ADDR;
703         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
704         if (rc)
705                 goto done;
706
707         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
708         inbound_region.mode = PCI_BAR_MATCH_MODE;
709         inbound_region.bar = CFG_BAR_ID;
710         inbound_region.addr = SPI_FLASH_BASE_ADDR;
711         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
712         if (rc)
713                 goto done;
714
715         /* Inbound Region 2 - Bar 4 - Point to HBM */
716         inbound_region.mode = PCI_BAR_MATCH_MODE;
717         inbound_region.bar = HBM_BAR_ID;
718         inbound_region.addr = DRAM_PHYS_BASE;
719         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
720         if (rc)
721                 goto done;
722
723         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
724
725         /* Outbound Region 0 - Point to Host */
726         outbound_region.addr = HOST_PHYS_BASE;
727         outbound_region.size = HOST_PHYS_SIZE;
728         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
729
730 done:
731         return rc;
732 }
733
734 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
735 {
736         return RREG32(mmHW_STATE);
737 }
738
739 static int gaudi_early_init(struct hl_device *hdev)
740 {
741         struct asic_fixed_properties *prop = &hdev->asic_prop;
742         struct pci_dev *pdev = hdev->pdev;
743         u32 fw_boot_status;
744         int rc;
745
746         rc = gaudi_set_fixed_properties(hdev);
747         if (rc) {
748                 dev_err(hdev->dev, "Failed setting fixed properties\n");
749                 return rc;
750         }
751
752         /* Check BAR sizes */
753         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
754                 dev_err(hdev->dev,
755                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
756                         SRAM_BAR_ID,
757                         (unsigned long long) pci_resource_len(pdev,
758                                                         SRAM_BAR_ID),
759                         SRAM_BAR_SIZE);
760                 rc = -ENODEV;
761                 goto free_queue_props;
762         }
763
764         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
765                 dev_err(hdev->dev,
766                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
767                         CFG_BAR_ID,
768                         (unsigned long long) pci_resource_len(pdev,
769                                                                 CFG_BAR_ID),
770                         CFG_BAR_SIZE);
771                 rc = -ENODEV;
772                 goto free_queue_props;
773         }
774
775         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
776
777         /* If FW security is enabled at this point it means no access to ELBI */
778         if (hdev->asic_prop.fw_security_enabled) {
779                 hdev->asic_prop.iatu_done_by_fw = true;
780
781                 /*
782                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
783                  * decision can only be taken based on PCI ID security.
784                  */
785                 hdev->asic_prop.gic_interrupts_enable = false;
786                 goto pci_init;
787         }
788
789         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
790                                 &fw_boot_status);
791         if (rc)
792                 goto free_queue_props;
793
794         /* Check whether FW is configuring iATU */
795         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
796                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
797                 hdev->asic_prop.iatu_done_by_fw = true;
798
799 pci_init:
800         rc = hl_pci_init(hdev);
801         if (rc)
802                 goto free_queue_props;
803
804         /* Before continuing in the initialization, we need to read the preboot
805          * version to determine whether we run with a security-enabled firmware
806          */
807         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
808                                         mmCPU_BOOT_DEV_STS0,
809                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
810                                         mmCPU_BOOT_ERR1,
811                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
812         if (rc) {
813                 if (hdev->reset_on_preboot_fail)
814                         hdev->asic_funcs->hw_fini(hdev, true);
815                 goto pci_fini;
816         }
817
818         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
819                 dev_info(hdev->dev,
820                         "H/W state is dirty, must reset before initializing\n");
821                 hdev->asic_funcs->hw_fini(hdev, true);
822         }
823
824         return 0;
825
826 pci_fini:
827         hl_pci_fini(hdev);
828 free_queue_props:
829         kfree(hdev->asic_prop.hw_queues_props);
830         return rc;
831 }
832
833 static int gaudi_early_fini(struct hl_device *hdev)
834 {
835         kfree(hdev->asic_prop.hw_queues_props);
836         hl_pci_fini(hdev);
837
838         return 0;
839 }
840
841 /**
842  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
843  *
844  * @hdev: pointer to hl_device structure
845  *
846  */
847 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
848 {
849         struct asic_fixed_properties *prop = &hdev->asic_prop;
850         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
851         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
852         int rc;
853
854         if (hdev->asic_prop.fw_security_enabled) {
855                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
856
857                 if (rc)
858                         return rc;
859
860                 freq = pll_freq_arr[2];
861         } else {
862                 /* Backward compatibility */
863                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
864                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
865                 nr = RREG32(mmPSOC_CPU_PLL_NR);
866                 nf = RREG32(mmPSOC_CPU_PLL_NF);
867                 od = RREG32(mmPSOC_CPU_PLL_OD);
868
869                 if (div_sel == DIV_SEL_REF_CLK ||
870                                 div_sel == DIV_SEL_DIVIDED_REF) {
871                         if (div_sel == DIV_SEL_REF_CLK)
872                                 freq = PLL_REF_CLK;
873                         else
874                                 freq = PLL_REF_CLK / (div_fctr + 1);
875                 } else if (div_sel == DIV_SEL_PLL_CLK ||
876                         div_sel == DIV_SEL_DIVIDED_PLL) {
877                         pll_clk = PLL_REF_CLK * (nf + 1) /
878                                         ((nr + 1) * (od + 1));
879                         if (div_sel == DIV_SEL_PLL_CLK)
880                                 freq = pll_clk;
881                         else
882                                 freq = pll_clk / (div_fctr + 1);
883                 } else {
884                         dev_warn(hdev->dev,
885                                 "Received invalid div select value: %d",
886                                 div_sel);
887                         freq = 0;
888                 }
889         }
890
891         prop->psoc_timestamp_frequency = freq;
892         prop->psoc_pci_pll_nr = nr;
893         prop->psoc_pci_pll_nf = nf;
894         prop->psoc_pci_pll_od = od;
895         prop->psoc_pci_pll_div_factor = div_fctr;
896
897         return 0;
898 }
899
900 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
901                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
902 {
903         struct asic_fixed_properties *prop = &hdev->asic_prop;
904         struct packet_lin_dma *init_tpc_mem_pkt;
905         struct hl_cs_job *job;
906         struct hl_cb *cb;
907         u64 dst_addr;
908         u32 cb_size, ctl;
909         u8 tpc_id;
910         int rc;
911
912         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
913         if (!cb)
914                 return -EFAULT;
915
916         init_tpc_mem_pkt = cb->kernel_address;
917         cb_size = sizeof(*init_tpc_mem_pkt);
918         memset(init_tpc_mem_pkt, 0, cb_size);
919
920         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
921
922         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
923         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
924         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
925         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
926
927         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
928
929         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
930         dst_addr = (prop->sram_user_base_address &
931                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
932                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
933         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
934
935         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
936         if (!job) {
937                 dev_err(hdev->dev, "Failed to allocate a new job\n");
938                 rc = -ENOMEM;
939                 goto release_cb;
940         }
941
942         job->id = 0;
943         job->user_cb = cb;
944         atomic_inc(&job->user_cb->cs_cnt);
945         job->user_cb_size = cb_size;
946         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
947         job->patched_cb = job->user_cb;
948         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
949
950         hl_debugfs_add_job(hdev, job);
951
952         rc = gaudi_send_job_on_qman0(hdev, job);
953
954         if (rc)
955                 goto free_job;
956
957         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
958                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
959                 if (rc)
960                         break;
961         }
962
963 free_job:
964         hl_userptr_delete_list(hdev, &job->userptr_list);
965         hl_debugfs_remove_job(hdev, job);
966         kfree(job);
967         atomic_dec(&cb->cs_cnt);
968
969 release_cb:
970         hl_cb_put(cb);
971         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
972
973         return rc;
974 }
975
976 /*
977  * gaudi_init_tpc_mem() - Initialize TPC memories.
978  * @hdev: Pointer to hl_device structure.
979  *
980  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
981  *
982  * Return: 0 for success, negative value for error.
983  */
984 static int gaudi_init_tpc_mem(struct hl_device *hdev)
985 {
986         const struct firmware *fw;
987         size_t fw_size;
988         void *cpu_addr;
989         dma_addr_t dma_handle;
990         int rc, count = 5;
991
992 again:
993         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
994         if (rc == -EINTR && count-- > 0) {
995                 msleep(50);
996                 goto again;
997         }
998
999         if (rc) {
1000                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1001                                 GAUDI_TPC_FW_FILE);
1002                 goto out;
1003         }
1004
1005         fw_size = fw->size;
1006         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1007                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1008         if (!cpu_addr) {
1009                 dev_err(hdev->dev,
1010                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1011                         fw_size);
1012                 rc = -ENOMEM;
1013                 goto out;
1014         }
1015
1016         memcpy(cpu_addr, fw->data, fw_size);
1017
1018         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1019
1020         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1021                         dma_handle);
1022
1023 out:
1024         release_firmware(fw);
1025         return rc;
1026 }
1027
1028 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1029 {
1030         struct gaudi_device *gaudi = hdev->asic_specific;
1031         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1032         struct hl_hw_queue *q;
1033         u32 i, sob_id, sob_group_id, queue_id;
1034
1035         /* Iterate through SOB groups and assign a SOB for each slave queue */
1036         sob_group_id =
1037                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1038         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1039
1040         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1041         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1042                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1043                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1044         }
1045
1046         /* Both DMA5 and TPC7 use the same resources since only a single
1047          * engine need to participate in the reduction process
1048          */
1049         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1050         q = &hdev->kernel_queues[queue_id];
1051         q->sync_stream_prop.collective_sob_id =
1052                         sob_id + NIC_NUMBER_OF_ENGINES;
1053
1054         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1055         q = &hdev->kernel_queues[queue_id];
1056         q->sync_stream_prop.collective_sob_id =
1057                         sob_id + NIC_NUMBER_OF_ENGINES;
1058 }
1059
1060 static void gaudi_sob_group_hw_reset(struct kref *ref)
1061 {
1062         struct gaudi_hw_sob_group *hw_sob_group =
1063                 container_of(ref, struct gaudi_hw_sob_group, kref);
1064         struct hl_device *hdev = hw_sob_group->hdev;
1065         u64 base_addr;
1066         int rc;
1067
1068         base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1069                         hw_sob_group->base_sob_id * 4;
1070         rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
1071                         base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
1072         if (rc)
1073                 dev_err(hdev->dev,
1074                         "failed resetting sob group - sob base %u, count %u",
1075                         hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
1076
1077         kref_init(&hw_sob_group->kref);
1078 }
1079
1080 static void gaudi_sob_group_reset_error(struct kref *ref)
1081 {
1082         struct gaudi_hw_sob_group *hw_sob_group =
1083                 container_of(ref, struct gaudi_hw_sob_group, kref);
1084         struct hl_device *hdev = hw_sob_group->hdev;
1085
1086         dev_crit(hdev->dev,
1087                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1088                 hw_sob_group->base_sob_id);
1089 }
1090
1091 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1092 {
1093         struct gaudi_collective_properties *prop;
1094         int i;
1095
1096         prop = &gaudi->collective_props;
1097
1098         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1099
1100         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1101                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1102                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1103                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1104         /* Set collective engine bit */
1105         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1106                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1107 }
1108
1109 static int gaudi_collective_init(struct hl_device *hdev)
1110 {
1111         u32 i, sob_id, reserved_sobs_per_group;
1112         struct gaudi_collective_properties *prop;
1113         struct gaudi_device *gaudi;
1114
1115         gaudi = hdev->asic_specific;
1116         prop = &gaudi->collective_props;
1117         sob_id = hdev->asic_prop.collective_first_sob;
1118
1119         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1120         reserved_sobs_per_group =
1121                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1122
1123         /* Init SOB groups */
1124         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1125                 prop->hw_sob_group[i].hdev = hdev;
1126                 prop->hw_sob_group[i].base_sob_id = sob_id;
1127                 sob_id += reserved_sobs_per_group;
1128                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1129         }
1130
1131         for (i = 0 ; i < QMAN_STREAMS; i++) {
1132                 prop->next_sob_group_val[i] = 1;
1133                 prop->curr_sob_group_idx[i] = 0;
1134                 gaudi_collective_map_sobs(hdev, i);
1135         }
1136
1137         gaudi_collective_mstr_sob_mask_set(gaudi);
1138
1139         return 0;
1140 }
1141
1142 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1143 {
1144         struct gaudi_device *gaudi = hdev->asic_specific;
1145         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1146
1147         kref_put(&cprop->hw_sob_group[sob_group].kref,
1148                                         gaudi_sob_group_hw_reset);
1149 }
1150
1151 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1152                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1153 {
1154         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1155         struct gaudi_collective_properties *cprop;
1156         struct hl_gen_wait_properties wait_prop;
1157         struct hl_sync_stream_properties *prop;
1158         struct gaudi_device *gaudi;
1159
1160         gaudi = hdev->asic_specific;
1161         cprop = &gaudi->collective_props;
1162         queue_id = job->hw_queue_id;
1163         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1164
1165         master_sob_base =
1166                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1167         master_monitor = prop->collective_mstr_mon_id[0];
1168
1169         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1170
1171         dev_dbg(hdev->dev,
1172                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1173                 master_sob_base, cprop->mstr_sob_mask[0],
1174                 cprop->next_sob_group_val[stream],
1175                 master_monitor, queue_id);
1176
1177         wait_prop.data = (void *) job->patched_cb;
1178         wait_prop.sob_base = master_sob_base;
1179         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1180         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1181         wait_prop.mon_id = master_monitor;
1182         wait_prop.q_idx = queue_id;
1183         wait_prop.size = cb_size;
1184         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1185
1186         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1187         master_monitor = prop->collective_mstr_mon_id[1];
1188
1189         dev_dbg(hdev->dev,
1190                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1191                 master_sob_base, cprop->mstr_sob_mask[1],
1192                 cprop->next_sob_group_val[stream],
1193                 master_monitor, queue_id);
1194
1195         wait_prop.sob_base = master_sob_base;
1196         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1197         wait_prop.mon_id = master_monitor;
1198         wait_prop.size = cb_size;
1199         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1200 }
1201
1202 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1203                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1204 {
1205         struct hl_gen_wait_properties wait_prop;
1206         struct hl_sync_stream_properties *prop;
1207         u32 queue_id, cb_size = 0;
1208
1209         queue_id = job->hw_queue_id;
1210         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1211
1212         /* Add to wait CBs using slave monitor */
1213         wait_prop.data = (void *) job->user_cb;
1214         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1215         wait_prop.sob_mask = 0x1;
1216         wait_prop.sob_val = cs_cmpl->sob_val;
1217         wait_prop.mon_id = prop->collective_slave_mon_id;
1218         wait_prop.q_idx = queue_id;
1219         wait_prop.size = cb_size;
1220
1221         dev_dbg(hdev->dev,
1222                 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1223                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1224                 prop->collective_slave_mon_id, queue_id);
1225
1226         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1227
1228         dev_dbg(hdev->dev,
1229                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1230                 prop->collective_sob_id, queue_id);
1231
1232         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1233                         prop->collective_sob_id, cb_size, false);
1234 }
1235
1236 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1237 {
1238         struct hl_cs_compl *signal_cs_cmpl =
1239                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1240         struct hl_cs_compl *cs_cmpl =
1241                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1242         struct gaudi_collective_properties *cprop;
1243         u32 stream, queue_id, sob_group_offset;
1244         struct gaudi_device *gaudi;
1245         struct hl_device *hdev;
1246         struct hl_cs_job *job;
1247         struct hl_ctx *ctx;
1248
1249         ctx = cs->ctx;
1250         hdev = ctx->hdev;
1251         gaudi = hdev->asic_specific;
1252         cprop = &gaudi->collective_props;
1253
1254         /* copy the SOB id and value of the signal CS */
1255         cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1256         cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1257
1258         /* Calculate the stream from collective master queue (1st job) */
1259         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1260         stream = job->hw_queue_id % 4;
1261         sob_group_offset =
1262                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1263
1264         list_for_each_entry(job, &cs->job_list, cs_node) {
1265                 queue_id = job->hw_queue_id;
1266
1267                 if (hdev->kernel_queues[queue_id].collective_mode ==
1268                                 HL_COLLECTIVE_MASTER)
1269                         gaudi_collective_master_init_job(hdev, job, stream,
1270                                                 sob_group_offset);
1271                 else
1272                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1273         }
1274
1275         cs_cmpl->sob_group = sob_group_offset;
1276
1277         /* Handle sob group kref and wraparound */
1278         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1279         cprop->next_sob_group_val[stream]++;
1280
1281         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1282                 /*
1283                  * Decrement as we reached the max value.
1284                  * The release function won't be called here as we've
1285                  * just incremented the refcount.
1286                  */
1287                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1288                                 gaudi_sob_group_reset_error);
1289                 cprop->next_sob_group_val[stream] = 1;
1290                 /* only two SOBs are currently in use */
1291                 cprop->curr_sob_group_idx[stream] =
1292                         (cprop->curr_sob_group_idx[stream] + 1) &
1293                                                         (HL_RSVD_SOBS - 1);
1294
1295                 gaudi_collective_map_sobs(hdev, stream);
1296
1297                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1298                                 cprop->curr_sob_group_idx[stream], stream);
1299         }
1300
1301         /* Increment kref since all slave queues are now waiting on it */
1302         kref_get(&cs_cmpl->hw_sob->kref);
1303         /*
1304          * Must put the signal fence after the SOB refcnt increment so
1305          * the SOB refcnt won't turn 0 and reset the SOB before the
1306          * wait CS was submitted.
1307          */
1308         mb();
1309         hl_fence_put(cs->signal_fence);
1310         cs->signal_fence = NULL;
1311 }
1312
1313 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1314                 struct hl_ctx *ctx, struct hl_cs *cs,
1315                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1316 {
1317         struct hw_queue_properties *hw_queue_prop;
1318         struct hl_cs_counters_atomic *cntr;
1319         struct hl_cs_job *job;
1320         struct hl_cb *cb;
1321         u32 cb_size;
1322         bool patched_cb;
1323
1324         cntr = &hdev->aggregated_cs_counters;
1325
1326         if (mode == HL_COLLECTIVE_MASTER) {
1327                 /* CB size of collective master queue contains
1328                  * 4 msg short packets for monitor 1 configuration
1329                  * 1 fence packet
1330                  * 4 msg short packets for monitor 2 configuration
1331                  * 1 fence packet
1332                  * 2 msg prot packets for completion and MSI-X
1333                  */
1334                 cb_size = sizeof(struct packet_msg_short) * 8 +
1335                                 sizeof(struct packet_fence) * 2 +
1336                                 sizeof(struct packet_msg_prot) * 2;
1337                 patched_cb = true;
1338         } else {
1339                 /* CB size of collective slave queues contains
1340                  * 4 msg short packets for monitor configuration
1341                  * 1 fence packet
1342                  * 1 additional msg short packet for sob signal
1343                  */
1344                 cb_size = sizeof(struct packet_msg_short) * 5 +
1345                                 sizeof(struct packet_fence);
1346                 patched_cb = false;
1347         }
1348
1349         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1350         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1351         if (!job) {
1352                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1353                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1354                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1355                 return -ENOMEM;
1356         }
1357
1358         /* Allocate internal mapped CB for non patched CBs */
1359         cb = hl_cb_kernel_create(hdev, cb_size,
1360                         hdev->mmu_enable && !patched_cb);
1361         if (!cb) {
1362                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1363                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1364                 kfree(job);
1365                 return -EFAULT;
1366         }
1367
1368         job->id = 0;
1369         job->cs = cs;
1370         job->user_cb = cb;
1371         atomic_inc(&job->user_cb->cs_cnt);
1372         job->user_cb_size = cb_size;
1373         job->hw_queue_id = queue_id;
1374
1375         /*
1376          * No need in parsing, user CB is the patched CB.
1377          * We call hl_cb_destroy() out of two reasons - we don't need
1378          * the CB in the CB idr anymore and to decrement its refcount as
1379          * it was incremented inside hl_cb_kernel_create().
1380          */
1381         if (patched_cb)
1382                 job->patched_cb = job->user_cb;
1383         else
1384                 job->patched_cb = NULL;
1385
1386         job->job_cb_size = job->user_cb_size;
1387         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1388
1389         /* increment refcount as for external queues we get completion */
1390         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1391                 cs_get(cs);
1392
1393         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1394
1395         list_add_tail(&job->cs_node, &cs->job_list);
1396
1397         hl_debugfs_add_job(hdev, job);
1398
1399         return 0;
1400 }
1401
1402 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1403                 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1404                 u32 collective_engine_id)
1405 {
1406         struct gaudi_device *gaudi = hdev->asic_specific;
1407         struct hw_queue_properties *hw_queue_prop;
1408         u32 queue_id, collective_queue, num_jobs;
1409         u32 stream, nic_queue, nic_idx = 0;
1410         bool skip;
1411         int i, rc = 0;
1412
1413         /* Verify wait queue id is configured as master */
1414         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1415         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1416                 dev_err(hdev->dev,
1417                         "Queue %d is not configured as collective master\n",
1418                         wait_queue_id);
1419                 return -EINVAL;
1420         }
1421
1422         /* Verify engine id is supported */
1423         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1424                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1425                 dev_err(hdev->dev,
1426                         "Collective wait does not support engine %u\n",
1427                         collective_engine_id);
1428                 return -EINVAL;
1429         }
1430
1431         stream = wait_queue_id % 4;
1432
1433         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1434                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1435         else
1436                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1437
1438         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1439         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1440
1441         /* First job goes to the collective master queue, it will wait for
1442          * the collective slave queues to finish execution.
1443          * The synchronization is done using two monitors:
1444          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1445          * reduction engine (DMA5/TPC7).
1446          *
1447          * Rest of the jobs goes to the collective slave queues which will
1448          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1449          */
1450         for (i = 0 ; i < num_jobs ; i++) {
1451                 if (i == 0) {
1452                         queue_id = wait_queue_id;
1453                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1454                                 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1455                 } else {
1456                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1457                                 if (gaudi->hw_cap_initialized &
1458                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1459                                         skip = false;
1460                                 else
1461                                         skip = true;
1462
1463                                 queue_id = nic_queue;
1464                                 nic_queue += 4;
1465                                 nic_idx++;
1466
1467                                 if (skip)
1468                                         continue;
1469                         } else {
1470                                 queue_id = collective_queue;
1471                         }
1472
1473                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1474                                 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1475                 }
1476
1477                 if (rc)
1478                         return rc;
1479         }
1480
1481         return rc;
1482 }
1483
1484 static int gaudi_late_init(struct hl_device *hdev)
1485 {
1486         struct gaudi_device *gaudi = hdev->asic_specific;
1487         int rc;
1488
1489         rc = gaudi->cpucp_info_get(hdev);
1490         if (rc) {
1491                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1492                 return rc;
1493         }
1494
1495         if ((hdev->card_type == cpucp_card_type_pci) &&
1496                         (hdev->nic_ports_mask & 0x3)) {
1497                 dev_info(hdev->dev,
1498                         "PCI card detected, only 8 ports are enabled\n");
1499                 hdev->nic_ports_mask &= ~0x3;
1500
1501                 /* Stop and disable unused NIC QMANs */
1502                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1503                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1504                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1505
1506                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1507                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1508                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1509
1510                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1511                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1512
1513                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1514         }
1515
1516         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1517         if (rc) {
1518                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1519                 return rc;
1520         }
1521
1522         rc = gaudi_fetch_psoc_frequency(hdev);
1523         if (rc) {
1524                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1525                 goto disable_pci_access;
1526         }
1527
1528         rc = gaudi_mmu_clear_pgt_range(hdev);
1529         if (rc) {
1530                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1531                 goto disable_pci_access;
1532         }
1533
1534         rc = gaudi_init_tpc_mem(hdev);
1535         if (rc) {
1536                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1537                 goto disable_pci_access;
1538         }
1539
1540         rc = gaudi_collective_init(hdev);
1541         if (rc) {
1542                 dev_err(hdev->dev, "Failed to init collective\n");
1543                 goto disable_pci_access;
1544         }
1545
1546         return 0;
1547
1548 disable_pci_access:
1549         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1550
1551         return rc;
1552 }
1553
1554 static void gaudi_late_fini(struct hl_device *hdev)
1555 {
1556         const struct hwmon_channel_info **channel_info_arr;
1557         int i = 0;
1558
1559         if (!hdev->hl_chip_info->info)
1560                 return;
1561
1562         channel_info_arr = hdev->hl_chip_info->info;
1563
1564         while (channel_info_arr[i]) {
1565                 kfree(channel_info_arr[i]->config);
1566                 kfree(channel_info_arr[i]);
1567                 i++;
1568         }
1569
1570         kfree(channel_info_arr);
1571
1572         hdev->hl_chip_info->info = NULL;
1573 }
1574
1575 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1576 {
1577         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1578         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1579         int i, j, rc = 0;
1580
1581         /*
1582          * The device CPU works with 40-bits addresses, while bit 39 must be set
1583          * to '1' when accessing the host.
1584          * Bits 49:39 of the full host address are saved for a later
1585          * configuration of the HW to perform extension to 50 bits.
1586          * Because there is a single HW register that holds the extension bits,
1587          * these bits must be identical in all allocated range.
1588          */
1589
1590         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1591                 virt_addr_arr[i] =
1592                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1593                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1594                                                 &dma_addr_arr[i],
1595                                                 GFP_KERNEL | __GFP_ZERO);
1596                 if (!virt_addr_arr[i]) {
1597                         rc = -ENOMEM;
1598                         goto free_dma_mem_arr;
1599                 }
1600
1601                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1602                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1603                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1604                         break;
1605         }
1606
1607         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1608                 dev_err(hdev->dev,
1609                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1610                 rc = -EFAULT;
1611                 goto free_dma_mem_arr;
1612         }
1613
1614         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1615         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1616         hdev->cpu_pci_msb_addr =
1617                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1618
1619         if (!hdev->asic_prop.fw_security_enabled)
1620                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1621
1622 free_dma_mem_arr:
1623         for (j = 0 ; j < i ; j++)
1624                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1625                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1626                                                 virt_addr_arr[j],
1627                                                 dma_addr_arr[j]);
1628
1629         return rc;
1630 }
1631
1632 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1633 {
1634         struct gaudi_device *gaudi = hdev->asic_specific;
1635         struct gaudi_internal_qman_info *q;
1636         u32 i;
1637
1638         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1639                 q = &gaudi->internal_qmans[i];
1640                 if (!q->pq_kernel_addr)
1641                         continue;
1642                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1643                                                         q->pq_kernel_addr,
1644                                                         q->pq_dma_addr);
1645         }
1646 }
1647
1648 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1649 {
1650         struct gaudi_device *gaudi = hdev->asic_specific;
1651         struct gaudi_internal_qman_info *q;
1652         int rc, i;
1653
1654         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1655                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1656                         continue;
1657
1658                 q = &gaudi->internal_qmans[i];
1659
1660                 switch (i) {
1661                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1662                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1663                         break;
1664                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1665                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1666                         break;
1667                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1668                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1669                         break;
1670                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1671                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1672                         break;
1673                 default:
1674                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1675                         rc = -EINVAL;
1676                         goto free_internal_qmans_pq_mem;
1677                 }
1678
1679                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1680                                                 hdev, q->pq_size,
1681                                                 &q->pq_dma_addr,
1682                                                 GFP_KERNEL | __GFP_ZERO);
1683                 if (!q->pq_kernel_addr) {
1684                         rc = -ENOMEM;
1685                         goto free_internal_qmans_pq_mem;
1686                 }
1687         }
1688
1689         return 0;
1690
1691 free_internal_qmans_pq_mem:
1692         gaudi_free_internal_qmans_pq_mem(hdev);
1693         return rc;
1694 }
1695
1696 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1697 {
1698         struct asic_fixed_properties *prop = &hdev->asic_prop;
1699         struct pci_mem_region *region;
1700
1701         /* CFG */
1702         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1703         region->region_base = CFG_BASE;
1704         region->region_size = CFG_SIZE;
1705         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1706         region->bar_size = CFG_BAR_SIZE;
1707         region->bar_id = CFG_BAR_ID;
1708         region->used = 1;
1709
1710         /* SRAM */
1711         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1712         region->region_base = SRAM_BASE_ADDR;
1713         region->region_size = SRAM_SIZE;
1714         region->offset_in_bar = 0;
1715         region->bar_size = SRAM_BAR_SIZE;
1716         region->bar_id = SRAM_BAR_ID;
1717         region->used = 1;
1718
1719         /* DRAM */
1720         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1721         region->region_base = DRAM_PHYS_BASE;
1722         region->region_size = hdev->asic_prop.dram_size;
1723         region->offset_in_bar = 0;
1724         region->bar_size = prop->dram_pci_bar_size;
1725         region->bar_id = HBM_BAR_ID;
1726         region->used = 1;
1727
1728         /* SP SRAM */
1729         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1730         region->region_base = PSOC_SCRATCHPAD_ADDR;
1731         region->region_size = PSOC_SCRATCHPAD_SIZE;
1732         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1733         region->bar_size = CFG_BAR_SIZE;
1734         region->bar_id = CFG_BAR_ID;
1735         region->used = 1;
1736 }
1737
1738 static int gaudi_sw_init(struct hl_device *hdev)
1739 {
1740         struct gaudi_device *gaudi;
1741         u32 i, event_id = 0;
1742         int rc;
1743
1744         /* Allocate device structure */
1745         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1746         if (!gaudi)
1747                 return -ENOMEM;
1748
1749         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1750                 if (gaudi_irq_map_table[i].valid) {
1751                         if (event_id == GAUDI_EVENT_SIZE) {
1752                                 dev_err(hdev->dev,
1753                                         "Event array exceeds the limit of %u events\n",
1754                                         GAUDI_EVENT_SIZE);
1755                                 rc = -EINVAL;
1756                                 goto free_gaudi_device;
1757                         }
1758
1759                         gaudi->events[event_id++] =
1760                                         gaudi_irq_map_table[i].fc_id;
1761                 }
1762         }
1763
1764         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1765
1766         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1767
1768         hdev->asic_specific = gaudi;
1769
1770         /* Create DMA pool for small allocations */
1771         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1772                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1773         if (!hdev->dma_pool) {
1774                 dev_err(hdev->dev, "failed to create DMA pool\n");
1775                 rc = -ENOMEM;
1776                 goto free_gaudi_device;
1777         }
1778
1779         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1780         if (rc)
1781                 goto free_dma_pool;
1782
1783         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1784         if (!hdev->cpu_accessible_dma_pool) {
1785                 dev_err(hdev->dev,
1786                         "Failed to create CPU accessible DMA pool\n");
1787                 rc = -ENOMEM;
1788                 goto free_cpu_dma_mem;
1789         }
1790
1791         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1792                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1793                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1794         if (rc) {
1795                 dev_err(hdev->dev,
1796                         "Failed to add memory to CPU accessible DMA pool\n");
1797                 rc = -EFAULT;
1798                 goto free_cpu_accessible_dma_pool;
1799         }
1800
1801         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1802         if (rc)
1803                 goto free_cpu_accessible_dma_pool;
1804
1805         spin_lock_init(&gaudi->hw_queues_lock);
1806         mutex_init(&gaudi->clk_gate_mutex);
1807
1808         hdev->supports_sync_stream = true;
1809         hdev->supports_coresight = true;
1810         hdev->supports_staged_submission = true;
1811
1812         gaudi_set_pci_memory_regions(hdev);
1813
1814         return 0;
1815
1816 free_cpu_accessible_dma_pool:
1817         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1818 free_cpu_dma_mem:
1819         if (!hdev->asic_prop.fw_security_enabled)
1820                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1821                                         hdev->cpu_pci_msb_addr);
1822         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1823                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1824                         hdev->cpu_accessible_dma_mem,
1825                         hdev->cpu_accessible_dma_address);
1826 free_dma_pool:
1827         dma_pool_destroy(hdev->dma_pool);
1828 free_gaudi_device:
1829         kfree(gaudi);
1830         return rc;
1831 }
1832
1833 static int gaudi_sw_fini(struct hl_device *hdev)
1834 {
1835         struct gaudi_device *gaudi = hdev->asic_specific;
1836
1837         gaudi_free_internal_qmans_pq_mem(hdev);
1838
1839         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1840
1841         if (!hdev->asic_prop.fw_security_enabled)
1842                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1843                                         hdev->cpu_pci_msb_addr);
1844
1845         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1846                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1847                         hdev->cpu_accessible_dma_mem,
1848                         hdev->cpu_accessible_dma_address);
1849
1850         dma_pool_destroy(hdev->dma_pool);
1851
1852         mutex_destroy(&gaudi->clk_gate_mutex);
1853
1854         kfree(gaudi);
1855
1856         return 0;
1857 }
1858
1859 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1860 {
1861         struct hl_device *hdev = arg;
1862         int i;
1863
1864         if (hdev->disabled)
1865                 return IRQ_HANDLED;
1866
1867         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1868                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1869
1870         hl_irq_handler_eq(irq, &hdev->event_queue);
1871
1872         return IRQ_HANDLED;
1873 }
1874
1875 /*
1876  * For backward compatibility, new MSI interrupts should be set after the
1877  * existing CPU and NIC interrupts.
1878  */
1879 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1880                                 bool cpu_eq)
1881 {
1882         int msi_vec;
1883
1884         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1885                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1886                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1887
1888         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1889                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1890
1891         return pci_irq_vector(hdev->pdev, msi_vec);
1892 }
1893
1894 static int gaudi_enable_msi_single(struct hl_device *hdev)
1895 {
1896         int rc, irq;
1897
1898         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1899
1900         irq = gaudi_pci_irq_vector(hdev, 0, false);
1901         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1902                         "gaudi single msi", hdev);
1903         if (rc)
1904                 dev_err(hdev->dev,
1905                         "Failed to request single MSI IRQ\n");
1906
1907         return rc;
1908 }
1909
1910 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1911 {
1912         int cq_cnt = hdev->asic_prop.completion_queues_count;
1913         int rc, i, irq_cnt_init, irq;
1914
1915         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1916                 irq = gaudi_pci_irq_vector(hdev, i, false);
1917                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1918                                 &hdev->completion_queue[i]);
1919                 if (rc) {
1920                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1921                         goto free_irqs;
1922                 }
1923         }
1924
1925         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1926         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1927                                 &hdev->event_queue);
1928         if (rc) {
1929                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1930                 goto free_irqs;
1931         }
1932
1933         return 0;
1934
1935 free_irqs:
1936         for (i = 0 ; i < irq_cnt_init ; i++)
1937                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1938                                 &hdev->completion_queue[i]);
1939         return rc;
1940 }
1941
1942 static int gaudi_enable_msi(struct hl_device *hdev)
1943 {
1944         struct gaudi_device *gaudi = hdev->asic_specific;
1945         int rc;
1946
1947         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1948                 return 0;
1949
1950         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1951         if (rc < 0) {
1952                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1953                 return rc;
1954         }
1955
1956         if (rc < NUMBER_OF_INTERRUPTS) {
1957                 gaudi->multi_msi_mode = false;
1958                 rc = gaudi_enable_msi_single(hdev);
1959         } else {
1960                 gaudi->multi_msi_mode = true;
1961                 rc = gaudi_enable_msi_multi(hdev);
1962         }
1963
1964         if (rc)
1965                 goto free_pci_irq_vectors;
1966
1967         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1968
1969         return 0;
1970
1971 free_pci_irq_vectors:
1972         pci_free_irq_vectors(hdev->pdev);
1973         return rc;
1974 }
1975
1976 static void gaudi_sync_irqs(struct hl_device *hdev)
1977 {
1978         struct gaudi_device *gaudi = hdev->asic_specific;
1979         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1980
1981         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1982                 return;
1983
1984         /* Wait for all pending IRQs to be finished */
1985         if (gaudi->multi_msi_mode) {
1986                 for (i = 0 ; i < cq_cnt ; i++)
1987                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1988
1989                 synchronize_irq(gaudi_pci_irq_vector(hdev,
1990                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
1991                                                 true));
1992         } else {
1993                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1994         }
1995 }
1996
1997 static void gaudi_disable_msi(struct hl_device *hdev)
1998 {
1999         struct gaudi_device *gaudi = hdev->asic_specific;
2000         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2001
2002         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2003                 return;
2004
2005         gaudi_sync_irqs(hdev);
2006
2007         if (gaudi->multi_msi_mode) {
2008                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2009                                                 true);
2010                 free_irq(irq, &hdev->event_queue);
2011
2012                 for (i = 0 ; i < cq_cnt ; i++) {
2013                         irq = gaudi_pci_irq_vector(hdev, i, false);
2014                         free_irq(irq, &hdev->completion_queue[i]);
2015                 }
2016         } else {
2017                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2018         }
2019
2020         pci_free_irq_vectors(hdev->pdev);
2021
2022         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2023 }
2024
2025 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2026 {
2027         struct gaudi_device *gaudi = hdev->asic_specific;
2028
2029         if (hdev->asic_prop.fw_security_enabled)
2030                 return;
2031
2032         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2033                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2034                 return;
2035
2036         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2037                 return;
2038
2039         if (!hdev->sram_scrambler_enable)
2040                 return;
2041
2042         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2043                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2044         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2045                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2046         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2047                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2048         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2049                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2050         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2051                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2052         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2053                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2054         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2055                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2056         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2057                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2058
2059         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2060                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2061         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2062                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2063         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2064                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2065         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2066                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2067         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2068                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2069         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2070                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2071         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2072                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2073         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2074                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2075
2076         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2077                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2078         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2079                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2080         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2081                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2082         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2083                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2084         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2085                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2086         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2087                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2088         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2089                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2090         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2091                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2092
2093         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2094 }
2095
2096 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2097 {
2098         struct gaudi_device *gaudi = hdev->asic_specific;
2099
2100         if (hdev->asic_prop.fw_security_enabled)
2101                 return;
2102
2103         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2104                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2105                 return;
2106
2107         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2108                 return;
2109
2110         if (!hdev->dram_scrambler_enable)
2111                 return;
2112
2113         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2114                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2115         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2116                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2117         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2118                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2119         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2120                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2121         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2122                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2123         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2124                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2125         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2126                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2127         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2128                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2129
2130         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2131                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2132         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2133                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2134         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2135                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2136         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2137                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2138         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2139                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2140         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2141                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2142         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2143                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2144         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2145                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2146
2147         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2148                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2149         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2150                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2151         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2152                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2153         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2154                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2155         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2156                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2157         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2158                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2159         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2160                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2161         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2162                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2163
2164         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2165 }
2166
2167 static void gaudi_init_e2e(struct hl_device *hdev)
2168 {
2169         if (hdev->asic_prop.fw_security_enabled)
2170                 return;
2171
2172         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2173                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2174                 return;
2175
2176         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2177         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2178         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2179         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2180
2181         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2182         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2183         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2184         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2185
2186         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2187         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2188         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2189         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2190
2191         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2192         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2193         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2194         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2195
2196         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2197         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2198         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2199         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2200
2201         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2202         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2203         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2204         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2205
2206         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2207         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2208         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2209         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2210
2211         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2212         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2213         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2214         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2215
2216         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2217         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2218         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2219         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2220
2221         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2222         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2223         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2224         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2225
2226         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2227         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2228         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2229         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2230
2231         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2232         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2233         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2234         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2235
2236         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2237         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2238         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2239         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2240
2241         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2242         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2243         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2244         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2245
2246         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2247         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2248         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2249         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2250
2251         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2252         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2253         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2254         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2255
2256         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2257         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2258         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2259         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2260
2261         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2262         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2263         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2264         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2265
2266         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2267         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2268         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2269         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2270
2271         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2272         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2273         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2274         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2275
2276         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2277         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2278         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2279         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2280
2281         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2282         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2283         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2284         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2285
2286         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2287         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2288         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2289         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2290
2291         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2292         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2293         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2294         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2295
2296         if (!hdev->dram_scrambler_enable) {
2297                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2298                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2299                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2300                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2301
2302                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2303                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2304                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2305                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2306
2307                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2308                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2309                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2310                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2311
2312                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2313                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2314                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2315                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2316
2317                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2318                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2319                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2320                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2321
2322                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2323                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2324                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2325                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2326
2327                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2328                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2329                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2330                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2331
2332                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2333                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2334                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2335                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2336
2337                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2338                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2339                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2340                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2341
2342                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2343                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2344                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2345                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2346
2347                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2348                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2349                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2350                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2351
2352                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2353                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2354                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2355                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2356
2357                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2358                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2359                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2360                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2361
2362                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2363                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2364                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2365                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2366
2367                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2368                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2369                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2370                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2371
2372                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2373                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2374                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2375                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2376
2377                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2378                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2379                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2380                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2381
2382                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2383                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2384                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2385                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2386
2387                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2388                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2389                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2390                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2391
2392                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2393                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2394                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2395                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2396
2397                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2398                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2399                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2400                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2401
2402                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2403                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2404                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2405                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2406
2407                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2408                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2409                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2410                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2411
2412                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2413                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2414                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2415                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2416         }
2417
2418         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2419                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2420         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2421                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2422
2423         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2424                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2425         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2426                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2427
2428         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2429                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2430         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2431                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2432
2433         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2434                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2435         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2436                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2437
2438         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2439                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2440         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2441                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2442
2443         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2444                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2445         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2446                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2447
2448         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2449                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2450         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2451                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2452
2453         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2454                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2455         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2456                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2457
2458         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2459                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2460         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2461                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2462
2463         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2464                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2465         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2466                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2467
2468         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2469                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2470         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2471                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2472
2473         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2474                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2475         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2476                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2477
2478         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2479                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2480         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2481                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2482
2483         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2484                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2485         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2486                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2487
2488         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2489                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2490         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2491                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2492
2493         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2494                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2495         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2496                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2497
2498         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2499                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2500         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2501                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2502
2503         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2504                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2505         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2506                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2507
2508         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2509                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2510         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2511                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2512
2513         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2514                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2515         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2516                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2517
2518         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2519                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2520         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2521                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2522
2523         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2524                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2525         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2526                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2527
2528         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2529                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2530         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2531                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2532
2533         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2534                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2535         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2536                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2537 }
2538
2539 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2540 {
2541         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2542
2543         if (hdev->asic_prop.fw_security_enabled)
2544                 return;
2545
2546         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2547                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2548                 return;
2549
2550         hbm0_wr = 0x33333333;
2551         hbm0_rd = 0x77777777;
2552         hbm1_wr = 0x55555555;
2553         hbm1_rd = 0xDDDDDDDD;
2554
2555         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2556         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2557         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2558         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2559
2560         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2561         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2562         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2563         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2564
2565         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2566         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2567         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2568         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2569
2570         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2571         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2572         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2573         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2574
2575         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2576                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2577                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2578         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2579                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2580                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2581         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2582                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2583                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2584         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2585                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2586                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2587
2588         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2589                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2590                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2591         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2592                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2593                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2594         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2595                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2596                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2597         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2598                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2599                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2600 }
2601
2602 static void gaudi_init_golden_registers(struct hl_device *hdev)
2603 {
2604         u32 tpc_offset;
2605         int tpc_id, i;
2606
2607         gaudi_init_e2e(hdev);
2608         gaudi_init_hbm_cred(hdev);
2609
2610         for (tpc_id = 0, tpc_offset = 0;
2611                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2612                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2613                 /* Mask all arithmetic interrupts from TPC */
2614                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2615                 /* Set 16 cache lines */
2616                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2617                                 ICACHE_FETCH_LINE_NUM, 2);
2618         }
2619
2620         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2621         for (i = 0 ; i < 128 ; i += 8)
2622                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2623
2624         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2625         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2626         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2627         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2628 }
2629
2630 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2631                                         int qman_id, dma_addr_t qman_pq_addr)
2632 {
2633         struct cpu_dyn_regs *dyn_regs =
2634                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2635         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2636         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2637         u32 q_off, dma_qm_offset;
2638         u32 dma_qm_err_cfg, irq_handler_offset;
2639
2640         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2641
2642         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2643                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2644         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2645                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2646         so_base_en_lo = lower_32_bits(CFG_BASE +
2647                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2648         so_base_en_hi = upper_32_bits(CFG_BASE +
2649                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2650         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2651                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2652         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2653                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2654         so_base_ws_lo = lower_32_bits(CFG_BASE +
2655                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2656         so_base_ws_hi = upper_32_bits(CFG_BASE +
2657                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2658
2659         q_off = dma_qm_offset + qman_id * 4;
2660
2661         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2662         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2663
2664         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2665         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2666         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2667
2668         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2669         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2670                                                         QMAN_LDMA_SRC_OFFSET);
2671         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2672                                                         QMAN_LDMA_DST_OFFSET);
2673
2674         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2675         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2676         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2677         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2678         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2679         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2680         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2681         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2682
2683         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2684
2685         /* The following configuration is needed only once per QMAN */
2686         if (qman_id == 0) {
2687                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2688                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2689                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2690
2691                 /* Configure RAZWI IRQ */
2692                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2693                 if (hdev->stop_on_err)
2694                         dma_qm_err_cfg |=
2695                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2696
2697                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2698
2699                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2700                         lower_32_bits(CFG_BASE + irq_handler_offset));
2701                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2702                         upper_32_bits(CFG_BASE + irq_handler_offset));
2703
2704                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2705                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2706                                                                         dma_id);
2707
2708                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2709                                 QM_ARB_ERR_MSG_EN_MASK);
2710
2711                 /* Increase ARB WDT to support streams architecture */
2712                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2713                                 GAUDI_ARB_WDT_TIMEOUT);
2714
2715                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2716                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2717
2718                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2719         }
2720 }
2721
2722 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2723 {
2724         struct cpu_dyn_regs *dyn_regs =
2725                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2726         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2727         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2728         u32 irq_handler_offset;
2729
2730         /* Set to maximum possible according to physical size */
2731         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2732         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2733
2734         /* WA for H/W bug H3-2116 */
2735         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2736
2737         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2738         if (hdev->stop_on_err)
2739                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2740
2741         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2742
2743         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2744                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2745                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2746
2747         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2748                 lower_32_bits(CFG_BASE + irq_handler_offset));
2749         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2750                 upper_32_bits(CFG_BASE + irq_handler_offset));
2751
2752         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2753                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2754         WREG32(mmDMA0_CORE_PROT + dma_offset,
2755                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2756         /* If the channel is secured, it should be in MMU bypass mode */
2757         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2758                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2759         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2760 }
2761
2762 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2763                                 u32 enable_mask)
2764 {
2765         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2766
2767         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2768 }
2769
2770 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2771 {
2772         struct gaudi_device *gaudi = hdev->asic_specific;
2773         struct hl_hw_queue *q;
2774         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2775
2776         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2777                 return;
2778
2779         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2780                 dma_id = gaudi_dma_assignment[i];
2781                 /*
2782                  * For queues after the CPU Q need to add 1 to get the correct
2783                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2784                  * order to get the correct MSI register.
2785                  */
2786                 if (dma_id > 1) {
2787                         cpu_skip = 1;
2788                         nic_skip = NIC_NUMBER_OF_ENGINES;
2789                 } else {
2790                         cpu_skip = 0;
2791                         nic_skip = 0;
2792                 }
2793
2794                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2795                         q_idx = 4 * dma_id + j + cpu_skip;
2796                         q = &hdev->kernel_queues[q_idx];
2797                         q->cq_id = cq_id++;
2798                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2799                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2800                                                 q->bus_address);
2801                 }
2802
2803                 gaudi_init_dma_core(hdev, dma_id);
2804
2805                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2806         }
2807
2808         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2809 }
2810
2811 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2812                                         int qman_id, u64 qman_base_addr)
2813 {
2814         struct cpu_dyn_regs *dyn_regs =
2815                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2816         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2817         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2818         u32 dma_qm_err_cfg, irq_handler_offset;
2819         u32 q_off, dma_qm_offset;
2820
2821         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2822
2823         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2824                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2825         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2826                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2827         so_base_en_lo = lower_32_bits(CFG_BASE +
2828                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2829         so_base_en_hi = upper_32_bits(CFG_BASE +
2830                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2831         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2832                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2833         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2834                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2835         so_base_ws_lo = lower_32_bits(CFG_BASE +
2836                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2837         so_base_ws_hi = upper_32_bits(CFG_BASE +
2838                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2839
2840         q_off = dma_qm_offset + qman_id * 4;
2841
2842         if (qman_id < 4) {
2843                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2844                                         lower_32_bits(qman_base_addr));
2845                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2846                                         upper_32_bits(qman_base_addr));
2847
2848                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2849                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2850                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2851
2852                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2853                                                         QMAN_CPDMA_SIZE_OFFSET);
2854                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2855                                                         QMAN_CPDMA_SRC_OFFSET);
2856                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2857                                                         QMAN_CPDMA_DST_OFFSET);
2858         } else {
2859                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2860                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2861                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2862
2863                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2864                                                         QMAN_LDMA_SIZE_OFFSET);
2865                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2866                                                         QMAN_LDMA_SRC_OFFSET);
2867                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2868                                                         QMAN_LDMA_DST_OFFSET);
2869
2870                 /* Configure RAZWI IRQ */
2871                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2872                 if (hdev->stop_on_err)
2873                         dma_qm_err_cfg |=
2874                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2875
2876                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2877
2878                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2879                         lower_32_bits(CFG_BASE + irq_handler_offset));
2880                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2881                         upper_32_bits(CFG_BASE + irq_handler_offset));
2882
2883                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2884                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2885                                                                         dma_id);
2886
2887                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2888                                 QM_ARB_ERR_MSG_EN_MASK);
2889
2890                 /* Increase ARB WDT to support streams architecture */
2891                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2892                                 GAUDI_ARB_WDT_TIMEOUT);
2893
2894                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2895                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2896                                 QMAN_INTERNAL_MAKE_TRUSTED);
2897         }
2898
2899         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2900         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2901         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2902         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2903
2904         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2905         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2906                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2907                                 mtr_base_ws_lo);
2908                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2909                                 mtr_base_ws_hi);
2910                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2911                                 so_base_ws_lo);
2912                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2913                                 so_base_ws_hi);
2914         }
2915 }
2916
2917 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2918 {
2919         struct gaudi_device *gaudi = hdev->asic_specific;
2920         struct gaudi_internal_qman_info *q;
2921         u64 qman_base_addr;
2922         int i, j, dma_id, internal_q_index;
2923
2924         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2925                 return;
2926
2927         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2928                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2929
2930                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2931                          /*
2932                           * Add the CPU queue in order to get the correct queue
2933                           * number as all internal queue are placed after it
2934                           */
2935                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2936
2937                         q = &gaudi->internal_qmans[internal_q_index];
2938                         qman_base_addr = (u64) q->pq_dma_addr;
2939                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2940                                                 qman_base_addr);
2941                 }
2942
2943                 /* Initializing lower CP for HBM DMA QMAN */
2944                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2945
2946                 gaudi_init_dma_core(hdev, dma_id);
2947
2948                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2949         }
2950
2951         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2952 }
2953
2954 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2955                                         int qman_id, u64 qman_base_addr)
2956 {
2957         struct cpu_dyn_regs *dyn_regs =
2958                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2959         u32 mtr_base_lo, mtr_base_hi;
2960         u32 so_base_lo, so_base_hi;
2961         u32 irq_handler_offset;
2962         u32 q_off, mme_id;
2963         u32 mme_qm_err_cfg;
2964
2965         mtr_base_lo = lower_32_bits(CFG_BASE +
2966                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2967         mtr_base_hi = upper_32_bits(CFG_BASE +
2968                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2969         so_base_lo = lower_32_bits(CFG_BASE +
2970                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2971         so_base_hi = upper_32_bits(CFG_BASE +
2972                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2973
2974         q_off = mme_offset + qman_id * 4;
2975
2976         if (qman_id < 4) {
2977                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2978                                         lower_32_bits(qman_base_addr));
2979                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2980                                         upper_32_bits(qman_base_addr));
2981
2982                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2983                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2984                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2985
2986                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2987                                                         QMAN_CPDMA_SIZE_OFFSET);
2988                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2989                                                         QMAN_CPDMA_SRC_OFFSET);
2990                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2991                                                         QMAN_CPDMA_DST_OFFSET);
2992         } else {
2993                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2994                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2995                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2996
2997                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2998                                                         QMAN_LDMA_SIZE_OFFSET);
2999                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3000                                                         QMAN_LDMA_SRC_OFFSET);
3001                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3002                                                         QMAN_LDMA_DST_OFFSET);
3003
3004                 /* Configure RAZWI IRQ */
3005                 mme_id = mme_offset /
3006                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3007
3008                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3009                 if (hdev->stop_on_err)
3010                         mme_qm_err_cfg |=
3011                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3012
3013                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3014
3015                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3016                         lower_32_bits(CFG_BASE + irq_handler_offset));
3017                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3018                         upper_32_bits(CFG_BASE + irq_handler_offset));
3019
3020                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3021                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3022                                                                         mme_id);
3023
3024                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3025                                 QM_ARB_ERR_MSG_EN_MASK);
3026
3027                 /* Increase ARB WDT to support streams architecture */
3028                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3029                                 GAUDI_ARB_WDT_TIMEOUT);
3030
3031                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3032                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3033                                 QMAN_INTERNAL_MAKE_TRUSTED);
3034         }
3035
3036         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3037         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3038         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3039         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3040 }
3041
3042 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3043 {
3044         struct gaudi_device *gaudi = hdev->asic_specific;
3045         struct gaudi_internal_qman_info *q;
3046         u64 qman_base_addr;
3047         u32 mme_offset;
3048         int i, internal_q_index;
3049
3050         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3051                 return;
3052
3053         /*
3054          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3055          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3056          */
3057
3058         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3059
3060         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3061                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3062                 q = &gaudi->internal_qmans[internal_q_index];
3063                 qman_base_addr = (u64) q->pq_dma_addr;
3064                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3065                                         qman_base_addr);
3066                 if (i == 3)
3067                         mme_offset = 0;
3068         }
3069
3070         /* Initializing lower CP for MME QMANs */
3071         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3072         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3073         gaudi_init_mme_qman(hdev, 0, 4, 0);
3074
3075         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3076         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3077
3078         gaudi->hw_cap_initialized |= HW_CAP_MME;
3079 }
3080
3081 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3082                                 int qman_id, u64 qman_base_addr)
3083 {
3084         struct cpu_dyn_regs *dyn_regs =
3085                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3086         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3087         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3088         u32 tpc_qm_err_cfg, irq_handler_offset;
3089         u32 q_off, tpc_id;
3090
3091         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3092                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3093         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3094                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3095         so_base_en_lo = lower_32_bits(CFG_BASE +
3096                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3097         so_base_en_hi = upper_32_bits(CFG_BASE +
3098                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3099         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3100                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3101         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3102                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3103         so_base_ws_lo = lower_32_bits(CFG_BASE +
3104                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3105         so_base_ws_hi = upper_32_bits(CFG_BASE +
3106                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3107
3108         q_off = tpc_offset + qman_id * 4;
3109
3110         tpc_id = tpc_offset /
3111                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3112
3113         if (qman_id < 4) {
3114                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3115                                         lower_32_bits(qman_base_addr));
3116                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3117                                         upper_32_bits(qman_base_addr));
3118
3119                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3120                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3121                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3122
3123                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3124                                                         QMAN_CPDMA_SIZE_OFFSET);
3125                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3126                                                         QMAN_CPDMA_SRC_OFFSET);
3127                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3128                                                         QMAN_CPDMA_DST_OFFSET);
3129         } else {
3130                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3131                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3132                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3133
3134                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3135                                                         QMAN_LDMA_SIZE_OFFSET);
3136                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3137                                                         QMAN_LDMA_SRC_OFFSET);
3138                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3139                                                         QMAN_LDMA_DST_OFFSET);
3140
3141                 /* Configure RAZWI IRQ */
3142                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3143                 if (hdev->stop_on_err)
3144                         tpc_qm_err_cfg |=
3145                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3146
3147                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3148
3149                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3150                         lower_32_bits(CFG_BASE + irq_handler_offset));
3151                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3152                         upper_32_bits(CFG_BASE + irq_handler_offset));
3153
3154                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3155                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3156                                                                         tpc_id);
3157
3158                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3159                                 QM_ARB_ERR_MSG_EN_MASK);
3160
3161                 /* Increase ARB WDT to support streams architecture */
3162                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3163                                 GAUDI_ARB_WDT_TIMEOUT);
3164
3165                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3166                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3167                                 QMAN_INTERNAL_MAKE_TRUSTED);
3168         }
3169
3170         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3171         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3172         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3173         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3174
3175         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3176         if (tpc_id == 6) {
3177                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3178                                 mtr_base_ws_lo);
3179                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3180                                 mtr_base_ws_hi);
3181                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3182                                 so_base_ws_lo);
3183                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3184                                 so_base_ws_hi);
3185         }
3186 }
3187
3188 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3189 {
3190         struct gaudi_device *gaudi = hdev->asic_specific;
3191         struct gaudi_internal_qman_info *q;
3192         u64 qman_base_addr;
3193         u32 so_base_hi, tpc_offset = 0;
3194         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3195                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3196         int i, tpc_id, internal_q_index;
3197
3198         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3199                 return;
3200
3201         so_base_hi = upper_32_bits(CFG_BASE +
3202                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3203
3204         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3205                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3206                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3207                                                 tpc_id * QMAN_STREAMS + i;
3208                         q = &gaudi->internal_qmans[internal_q_index];
3209                         qman_base_addr = (u64) q->pq_dma_addr;
3210                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3211                                                 qman_base_addr);
3212
3213                         if (i == 3) {
3214                                 /* Initializing lower CP for TPC QMAN */
3215                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3216
3217                                 /* Enable the QMAN and TPC channel */
3218                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3219                                                 QMAN_TPC_ENABLE);
3220                         }
3221                 }
3222
3223                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3224                                 so_base_hi);
3225
3226                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3227
3228                 gaudi->hw_cap_initialized |=
3229                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3230         }
3231 }
3232
3233 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3234                                 int qman_id, u64 qman_base_addr, int nic_id)
3235 {
3236         struct cpu_dyn_regs *dyn_regs =
3237                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3238         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3239         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3240         u32 nic_qm_err_cfg, irq_handler_offset;
3241         u32 q_off;
3242
3243         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3244                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3245         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3246                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3247         so_base_en_lo = lower_32_bits(CFG_BASE +
3248                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3249         so_base_en_hi = upper_32_bits(CFG_BASE +
3250                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3251         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3252                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3253         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3254                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3255         so_base_ws_lo = lower_32_bits(CFG_BASE +
3256                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3257         so_base_ws_hi = upper_32_bits(CFG_BASE +
3258                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3259
3260         q_off = nic_offset + qman_id * 4;
3261
3262         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3263         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3264
3265         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3266         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3267         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3268
3269         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3270                                                         QMAN_LDMA_SIZE_OFFSET);
3271         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3272                                                         QMAN_LDMA_SRC_OFFSET);
3273         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3274                                                         QMAN_LDMA_DST_OFFSET);
3275
3276         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3277         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3278         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3279         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3280
3281         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3282         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3283         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3284         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3285         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3286
3287         if (qman_id == 0) {
3288                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3289                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3290                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3291
3292                 /* Configure RAZWI IRQ */
3293                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3294                 if (hdev->stop_on_err)
3295                         nic_qm_err_cfg |=
3296                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3297
3298                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3299
3300                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3301                         lower_32_bits(CFG_BASE + irq_handler_offset));
3302                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3303                         upper_32_bits(CFG_BASE + irq_handler_offset));
3304
3305                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3306                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3307                                                                         nic_id);
3308
3309                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3310                                 QM_ARB_ERR_MSG_EN_MASK);
3311
3312                 /* Increase ARB WDT to support streams architecture */
3313                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3314                                 GAUDI_ARB_WDT_TIMEOUT);
3315
3316                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3317                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3318                                 QMAN_INTERNAL_MAKE_TRUSTED);
3319         }
3320 }
3321
3322 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3323 {
3324         struct gaudi_device *gaudi = hdev->asic_specific;
3325         struct gaudi_internal_qman_info *q;
3326         u64 qman_base_addr;
3327         u32 nic_offset = 0;
3328         u32 nic_delta_between_qmans =
3329                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3330         u32 nic_delta_between_nics =
3331                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3332         int i, nic_id, internal_q_index;
3333
3334         if (!hdev->nic_ports_mask)
3335                 return;
3336
3337         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3338                 return;
3339
3340         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3341
3342         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3343                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3344                         nic_offset += nic_delta_between_qmans;
3345                         if (nic_id & 1) {
3346                                 nic_offset -= (nic_delta_between_qmans * 2);
3347                                 nic_offset += nic_delta_between_nics;
3348                         }
3349                         continue;
3350                 }
3351
3352                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3353                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3354                                                 nic_id * QMAN_STREAMS + i;
3355                         q = &gaudi->internal_qmans[internal_q_index];
3356                         qman_base_addr = (u64) q->pq_dma_addr;
3357                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3358                                                 qman_base_addr, nic_id);
3359                 }
3360
3361                 /* Enable the QMAN */
3362                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3363
3364                 nic_offset += nic_delta_between_qmans;
3365                 if (nic_id & 1) {
3366                         nic_offset -= (nic_delta_between_qmans * 2);
3367                         nic_offset += nic_delta_between_nics;
3368                 }
3369
3370                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3371         }
3372 }
3373
3374 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3375 {
3376         struct gaudi_device *gaudi = hdev->asic_specific;
3377
3378         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3379                 return;
3380
3381         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3382         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3383         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3384 }
3385
3386 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3387 {
3388         struct gaudi_device *gaudi = hdev->asic_specific;
3389
3390         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3391                 return;
3392
3393         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3394         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3395         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3396         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3397         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3398 }
3399
3400 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3401 {
3402         struct gaudi_device *gaudi = hdev->asic_specific;
3403
3404         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3405                 return;
3406
3407         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3408         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3409 }
3410
3411 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3412 {
3413         struct gaudi_device *gaudi = hdev->asic_specific;
3414         u32 tpc_offset = 0;
3415         int tpc_id;
3416
3417         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3418                 return;
3419
3420         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3421                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3422                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3423         }
3424 }
3425
3426 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3427 {
3428         struct gaudi_device *gaudi = hdev->asic_specific;
3429         u32 nic_mask, nic_offset = 0;
3430         u32 nic_delta_between_qmans =
3431                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3432         u32 nic_delta_between_nics =
3433                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3434         int nic_id;
3435
3436         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3437                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3438
3439                 if (gaudi->hw_cap_initialized & nic_mask)
3440                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3441
3442                 nic_offset += nic_delta_between_qmans;
3443                 if (nic_id & 1) {
3444                         nic_offset -= (nic_delta_between_qmans * 2);
3445                         nic_offset += nic_delta_between_nics;
3446                 }
3447         }
3448 }
3449
3450 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3451 {
3452         struct gaudi_device *gaudi = hdev->asic_specific;
3453
3454         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3455                 return;
3456
3457         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3458         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3459         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3460         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3461 }
3462
3463 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3464 {
3465         struct gaudi_device *gaudi = hdev->asic_specific;
3466
3467         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3468                 return;
3469
3470         /* Stop CPs of HBM DMA QMANs */
3471
3472         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3473         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3474         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3475         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3476         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3477 }
3478
3479 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3480 {
3481         struct gaudi_device *gaudi = hdev->asic_specific;
3482
3483         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3484                 return;
3485
3486         /* Stop CPs of MME QMANs */
3487         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3488         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3489 }
3490
3491 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3492 {
3493         struct gaudi_device *gaudi = hdev->asic_specific;
3494
3495         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3496                 return;
3497
3498         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3499         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3500         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3501         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3502         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3503         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3504         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3505         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3506 }
3507
3508 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3509 {
3510         struct gaudi_device *gaudi = hdev->asic_specific;
3511
3512         /* Stop upper CPs of QMANs */
3513
3514         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3515                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3516                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3517                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3518                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3519
3520         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3521                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3522                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3523                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3524                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3525
3526         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3527                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3528                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3529                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3530                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3531
3532         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3533                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3534                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3535                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3536                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3537
3538         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3539                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3540                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3541                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3542                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3543
3544         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3545                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3546                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3547                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3548                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3549
3550         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3551                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3552                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3553                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3554                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3555
3556         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3557                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3558                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3559                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3560                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3561
3562         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3563                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3564                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3565                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3566                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3567
3568         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3569                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3570                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3571                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3572                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3573 }
3574
3575 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3576 {
3577         struct gaudi_device *gaudi = hdev->asic_specific;
3578
3579         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3580                 return;
3581
3582         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3583         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3584         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3585 }
3586
3587 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3588 {
3589         struct gaudi_device *gaudi = hdev->asic_specific;
3590
3591         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3592                 return;
3593
3594         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3595         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3596         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3597         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3598         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3599 }
3600
3601 static void gaudi_mme_stall(struct hl_device *hdev)
3602 {
3603         struct gaudi_device *gaudi = hdev->asic_specific;
3604
3605         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3606                 return;
3607
3608         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3609         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3610         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3611         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3612         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3613         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3614         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3615         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3616         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3617         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3618         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3619         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3620         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3621         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3622         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3623         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3624         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3625 }
3626
3627 static void gaudi_tpc_stall(struct hl_device *hdev)
3628 {
3629         struct gaudi_device *gaudi = hdev->asic_specific;
3630
3631         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3632                 return;
3633
3634         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3635         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3636         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3637         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3638         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3639         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3640         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3641         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3642 }
3643
3644 static void gaudi_set_clock_gating(struct hl_device *hdev)
3645 {
3646         struct gaudi_device *gaudi = hdev->asic_specific;
3647         u32 qman_offset;
3648         bool enable;
3649         int i;
3650
3651         /* In case we are during debug session, don't enable the clock gate
3652          * as it may interfere
3653          */
3654         if (hdev->in_debug)
3655                 return;
3656
3657         if (hdev->asic_prop.fw_security_enabled)
3658                 return;
3659
3660         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3661                 enable = !!(hdev->clock_gating_mask &
3662                                 (BIT_ULL(gaudi_dma_assignment[i])));
3663
3664                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3665                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3666                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3667                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3668                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3669         }
3670
3671         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3672                 enable = !!(hdev->clock_gating_mask &
3673                                 (BIT_ULL(gaudi_dma_assignment[i])));
3674
3675                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3676                  * we need to not enable clock gating in that DMA
3677                  */
3678                 if (i == GAUDI_HBM_DMA_4)
3679                         enable = 0;
3680
3681                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3682                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3683                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3684                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3685                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3686         }
3687
3688         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3689         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3690         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3691
3692         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3693         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3694         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3695
3696         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3697                 enable = !!(hdev->clock_gating_mask &
3698                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3699
3700                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3701                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3702                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3703                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3704
3705                 qman_offset += TPC_QMAN_OFFSET;
3706         }
3707
3708         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3709 }
3710
3711 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3712 {
3713         struct gaudi_device *gaudi = hdev->asic_specific;
3714         u32 qman_offset;
3715         int i;
3716
3717         if (hdev->asic_prop.fw_security_enabled)
3718                 return;
3719
3720         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3721                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3722                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3723
3724                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3725         }
3726
3727         WREG32(mmMME0_QM_CGM_CFG, 0);
3728         WREG32(mmMME0_QM_CGM_CFG1, 0);
3729         WREG32(mmMME2_QM_CGM_CFG, 0);
3730         WREG32(mmMME2_QM_CGM_CFG1, 0);
3731
3732         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3733                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3734                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3735
3736                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3737         }
3738
3739         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3740 }
3741
3742 static void gaudi_enable_timestamp(struct hl_device *hdev)
3743 {
3744         /* Disable the timestamp counter */
3745         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3746
3747         /* Zero the lower/upper parts of the 64-bit counter */
3748         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3749         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3750
3751         /* Enable the counter */
3752         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3753 }
3754
3755 static void gaudi_disable_timestamp(struct hl_device *hdev)
3756 {
3757         /* Disable the timestamp counter */
3758         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3759 }
3760
3761 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3762 {
3763         u32 wait_timeout_ms;
3764
3765         dev_info(hdev->dev,
3766                 "Halting compute engines and disabling interrupts\n");
3767
3768         if (hdev->pldm)
3769                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3770         else
3771                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3772
3773         gaudi_stop_nic_qmans(hdev);
3774         gaudi_stop_mme_qmans(hdev);
3775         gaudi_stop_tpc_qmans(hdev);
3776         gaudi_stop_hbm_dma_qmans(hdev);
3777         gaudi_stop_pci_dma_qmans(hdev);
3778
3779         hdev->asic_funcs->disable_clock_gating(hdev);
3780
3781         msleep(wait_timeout_ms);
3782
3783         gaudi_pci_dma_stall(hdev);
3784         gaudi_hbm_dma_stall(hdev);
3785         gaudi_tpc_stall(hdev);
3786         gaudi_mme_stall(hdev);
3787
3788         msleep(wait_timeout_ms);
3789
3790         gaudi_disable_nic_qmans(hdev);
3791         gaudi_disable_mme_qmans(hdev);
3792         gaudi_disable_tpc_qmans(hdev);
3793         gaudi_disable_hbm_dma_qmans(hdev);
3794         gaudi_disable_pci_dma_qmans(hdev);
3795
3796         gaudi_disable_timestamp(hdev);
3797
3798         gaudi_disable_msi(hdev);
3799 }
3800
3801 static int gaudi_mmu_init(struct hl_device *hdev)
3802 {
3803         struct asic_fixed_properties *prop = &hdev->asic_prop;
3804         struct gaudi_device *gaudi = hdev->asic_specific;
3805         u64 hop0_addr;
3806         int rc, i;
3807
3808         if (!hdev->mmu_enable)
3809                 return 0;
3810
3811         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3812                 return 0;
3813
3814         for (i = 0 ; i < prop->max_asid ; i++) {
3815                 hop0_addr = prop->mmu_pgt_addr +
3816                                 (i * prop->mmu_hop_table_size);
3817
3818                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3819                 if (rc) {
3820                         dev_err(hdev->dev,
3821                                 "failed to set hop0 addr for asid %d\n", i);
3822                         goto err;
3823                 }
3824         }
3825
3826         /* init MMU cache manage page */
3827         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3828         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3829
3830         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3831
3832         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3833         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3834
3835         WREG32(mmSTLB_HOP_CONFIGURATION,
3836                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3837
3838         /*
3839          * The H/W expects the first PI after init to be 1. After wraparound
3840          * we'll write 0.
3841          */
3842         gaudi->mmu_cache_inv_pi = 1;
3843
3844         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3845
3846         return 0;
3847
3848 err:
3849         return rc;
3850 }
3851
3852 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3853 {
3854         void __iomem *dst;
3855
3856         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3857
3858         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3859 }
3860
3861 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3862 {
3863         void __iomem *dst;
3864
3865         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3866
3867         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3868 }
3869
3870 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3871 {
3872         struct dynamic_fw_load_mgr *dynamic_loader;
3873         struct cpu_dyn_regs *dyn_regs;
3874
3875         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3876
3877         /*
3878          * here we update initial values for few specific dynamic regs (as
3879          * before reading the first descriptor from FW those value has to be
3880          * hard-coded) in later stages of the protocol those values will be
3881          * updated automatically by reading the FW descriptor so data there
3882          * will always be up-to-date
3883          */
3884         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3885         dyn_regs->kmd_msg_to_cpu =
3886                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3887         dyn_regs->cpu_cmd_status_to_host =
3888                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3889
3890         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3891 }
3892
3893 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3894 {
3895         struct static_fw_load_mgr *static_loader;
3896
3897         static_loader = &hdev->fw_loader.static_loader;
3898
3899         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3900         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3901         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3902         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3903         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3904         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3905         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3906         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3907         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3908         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3909         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3910         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3911         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3912                         GAUDI_PLDM_RESET_WAIT_MSEC :
3913                         GAUDI_CPU_RESET_WAIT_MSEC;
3914 }
3915
3916 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3917 {
3918         struct asic_fixed_properties *prop = &hdev->asic_prop;
3919         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3920
3921         /* fill common fields */
3922         fw_loader->linux_loaded = false;
3923         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3924         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3925         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3926         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3927         fw_loader->skip_bmc = !hdev->bmc_enable;
3928         fw_loader->sram_bar_id = SRAM_BAR_ID;
3929         fw_loader->dram_bar_id = HBM_BAR_ID;
3930
3931         if (prop->dynamic_fw_load)
3932                 gaudi_init_dynamic_firmware_loader(hdev);
3933         else
3934                 gaudi_init_static_firmware_loader(hdev);
3935 }
3936
3937 static int gaudi_init_cpu(struct hl_device *hdev)
3938 {
3939         struct gaudi_device *gaudi = hdev->asic_specific;
3940         int rc;
3941
3942         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3943                 return 0;
3944
3945         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3946                 return 0;
3947
3948         /*
3949          * The device CPU works with 40 bits addresses.
3950          * This register sets the extension to 50 bits.
3951          */
3952         if (!hdev->asic_prop.fw_security_enabled)
3953                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3954
3955         rc = hl_fw_init_cpu(hdev);
3956
3957         if (rc)
3958                 return rc;
3959
3960         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3961
3962         return 0;
3963 }
3964
3965 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3966 {
3967         struct cpu_dyn_regs *dyn_regs =
3968                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3969         struct asic_fixed_properties *prop = &hdev->asic_prop;
3970         struct gaudi_device *gaudi = hdev->asic_specific;
3971         u32 status, irq_handler_offset;
3972         struct hl_eq *eq;
3973         struct hl_hw_queue *cpu_pq =
3974                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3975         int err;
3976
3977         if (!hdev->cpu_queues_enable)
3978                 return 0;
3979
3980         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3981                 return 0;
3982
3983         eq = &hdev->event_queue;
3984
3985         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3986         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3987
3988         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3989         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3990
3991         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3992                         lower_32_bits(hdev->cpu_accessible_dma_address));
3993         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3994                         upper_32_bits(hdev->cpu_accessible_dma_address));
3995
3996         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3997         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3998         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3999
4000         /* Used for EQ CI */
4001         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4002
4003         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4004
4005         if (gaudi->multi_msi_mode)
4006                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4007         else
4008                 WREG32(mmCPU_IF_QUEUE_INIT,
4009                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4010
4011         irq_handler_offset = prop->gic_interrupts_enable ?
4012                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4013                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4014
4015         WREG32(irq_handler_offset,
4016                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4017
4018         err = hl_poll_timeout(
4019                 hdev,
4020                 mmCPU_IF_QUEUE_INIT,
4021                 status,
4022                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4023                 1000,
4024                 cpu_timeout);
4025
4026         if (err) {
4027                 dev_err(hdev->dev,
4028                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4029                 return -EIO;
4030         }
4031
4032         /* update FW application security bits */
4033         if (prop->fw_cpu_boot_dev_sts0_valid)
4034                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4035         if (prop->fw_cpu_boot_dev_sts1_valid)
4036                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4037
4038         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4039         return 0;
4040 }
4041
4042 static void gaudi_pre_hw_init(struct hl_device *hdev)
4043 {
4044         /* Perform read from the device to make sure device is up */
4045         RREG32(mmHW_STATE);
4046
4047         if (!hdev->asic_prop.fw_security_enabled) {
4048                 /* Set the access through PCI bars (Linux driver only) as
4049                  * secured
4050                  */
4051                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4052                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4053                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4054
4055                 /* Perform read to flush the waiting writes to ensure
4056                  * configuration was set in the device
4057                  */
4058                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4059         }
4060
4061         /*
4062          * Let's mark in the H/W that we have reached this point. We check
4063          * this value in the reset_before_init function to understand whether
4064          * we need to reset the chip before doing H/W init. This register is
4065          * cleared by the H/W upon H/W reset
4066          */
4067         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4068 }
4069
4070 static int gaudi_hw_init(struct hl_device *hdev)
4071 {
4072         struct gaudi_device *gaudi = hdev->asic_specific;
4073         int rc;
4074
4075         gaudi_pre_hw_init(hdev);
4076
4077         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4078          * So we set it here and if anyone tries to move it later to
4079          * a different address, there will be an error
4080          */
4081         if (hdev->asic_prop.iatu_done_by_fw)
4082                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4083
4084         /*
4085          * Before pushing u-boot/linux to device, need to set the hbm bar to
4086          * base address of dram
4087          */
4088         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4089                 dev_err(hdev->dev,
4090                         "failed to map HBM bar to DRAM base address\n");
4091                 return -EIO;
4092         }
4093
4094         rc = gaudi_init_cpu(hdev);
4095         if (rc) {
4096                 dev_err(hdev->dev, "failed to initialize CPU\n");
4097                 return rc;
4098         }
4099
4100         /* In case the clock gating was enabled in preboot we need to disable
4101          * it here before touching the MME/TPC registers.
4102          * There is no need to take clk gating mutex because when this function
4103          * runs, no other relevant code can run
4104          */
4105         hdev->asic_funcs->disable_clock_gating(hdev);
4106
4107         /* SRAM scrambler must be initialized after CPU is running from HBM */
4108         gaudi_init_scrambler_sram(hdev);
4109
4110         /* This is here just in case we are working without CPU */
4111         gaudi_init_scrambler_hbm(hdev);
4112
4113         gaudi_init_golden_registers(hdev);
4114
4115         rc = gaudi_mmu_init(hdev);
4116         if (rc)
4117                 return rc;
4118
4119         gaudi_init_security(hdev);
4120
4121         gaudi_init_pci_dma_qmans(hdev);
4122
4123         gaudi_init_hbm_dma_qmans(hdev);
4124
4125         gaudi_init_mme_qmans(hdev);
4126
4127         gaudi_init_tpc_qmans(hdev);
4128
4129         gaudi_init_nic_qmans(hdev);
4130
4131         hdev->asic_funcs->set_clock_gating(hdev);
4132
4133         gaudi_enable_timestamp(hdev);
4134
4135         /* MSI must be enabled before CPU queues and NIC are initialized */
4136         rc = gaudi_enable_msi(hdev);
4137         if (rc)
4138                 goto disable_queues;
4139
4140         /* must be called after MSI was enabled */
4141         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4142         if (rc) {
4143                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4144                         rc);
4145                 goto disable_msi;
4146         }
4147
4148         /* Perform read from the device to flush all configuration */
4149         RREG32(mmHW_STATE);
4150
4151         return 0;
4152
4153 disable_msi:
4154         gaudi_disable_msi(hdev);
4155 disable_queues:
4156         gaudi_disable_mme_qmans(hdev);
4157         gaudi_disable_pci_dma_qmans(hdev);
4158
4159         return rc;
4160 }
4161
4162 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4163 {
4164         struct cpu_dyn_regs *dyn_regs =
4165                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4166         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4167         struct gaudi_device *gaudi = hdev->asic_specific;
4168         bool driver_performs_reset;
4169
4170         if (!hard_reset) {
4171                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4172                 return;
4173         }
4174
4175         if (hdev->pldm) {
4176                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4177                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4178         } else {
4179                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4180                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4181         }
4182
4183         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4184                                         !hdev->asic_prop.hard_reset_done_by_fw);
4185
4186         /* Set device to handle FLR by H/W as we will put the device CPU to
4187          * halt mode
4188          */
4189         if (driver_performs_reset)
4190                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4191                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4192
4193         /* If linux is loaded in the device CPU we need to communicate with it
4194          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4195          * registers in case of old F/Ws
4196          */
4197         if (hdev->fw_loader.linux_loaded) {
4198                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4199                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4200                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4201
4202                 WREG32(irq_handler_offset,
4203                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4204         } else {
4205                 if (hdev->asic_prop.hard_reset_done_by_fw)
4206                         hl_fw_ask_hard_reset_without_linux(hdev);
4207                 else
4208                         hl_fw_ask_halt_machine_without_linux(hdev);
4209         }
4210
4211         if (driver_performs_reset) {
4212
4213                 /* Configure the reset registers. Must be done as early as
4214                  * possible in case we fail during H/W initialization
4215                  */
4216                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4217                                                 (CFG_RST_H_DMA_MASK |
4218                                                 CFG_RST_H_MME_MASK |
4219                                                 CFG_RST_H_SM_MASK |
4220                                                 CFG_RST_H_TPC_7_MASK));
4221
4222                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4223
4224                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4225                                                 (CFG_RST_H_HBM_MASK |
4226                                                 CFG_RST_H_TPC_7_MASK |
4227                                                 CFG_RST_H_NIC_MASK |
4228                                                 CFG_RST_H_SM_MASK |
4229                                                 CFG_RST_H_DMA_MASK |
4230                                                 CFG_RST_H_MME_MASK |
4231                                                 CFG_RST_H_CPU_MASK |
4232                                                 CFG_RST_H_MMU_MASK));
4233
4234                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4235                                                 (CFG_RST_L_IF_MASK |
4236                                                 CFG_RST_L_PSOC_MASK |
4237                                                 CFG_RST_L_TPC_MASK));
4238
4239                 msleep(cpu_timeout_ms);
4240
4241                 /* Tell ASIC not to re-initialize PCIe */
4242                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4243
4244                 /* Restart BTL/BLR upon hard-reset */
4245                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4246
4247                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4248                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4249
4250                 dev_info(hdev->dev,
4251                         "Issued HARD reset command, going to wait %dms\n",
4252                         reset_timeout_ms);
4253         } else {
4254                 dev_info(hdev->dev,
4255                         "Firmware performs HARD reset, going to wait %dms\n",
4256                         reset_timeout_ms);
4257         }
4258
4259         /*
4260          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4261          * itself is in reset. Need to wait until the reset is deasserted
4262          */
4263         msleep(reset_timeout_ms);
4264
4265         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4266         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4267                 dev_err(hdev->dev,
4268                         "Timeout while waiting for device to reset 0x%x\n",
4269                         status);
4270
4271         if (gaudi) {
4272                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4273                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4274                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4275                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4276                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4277                                 HW_CAP_SRAM_SCRAMBLER |
4278                                 HW_CAP_HBM_SCRAMBLER |
4279                                 HW_CAP_CLK_GATE);
4280
4281                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4282
4283                 hdev->device_cpu_is_halted = false;
4284         }
4285 }
4286
4287 static int gaudi_suspend(struct hl_device *hdev)
4288 {
4289         int rc;
4290
4291         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4292         if (rc)
4293                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4294
4295         return rc;
4296 }
4297
4298 static int gaudi_resume(struct hl_device *hdev)
4299 {
4300         return gaudi_init_iatu(hdev);
4301 }
4302
4303 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4304                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4305 {
4306         int rc;
4307
4308         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4309                         VM_DONTCOPY | VM_NORESERVE;
4310
4311         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4312                                 (dma_addr - HOST_PHYS_BASE), size);
4313         if (rc)
4314                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4315
4316         return rc;
4317 }
4318
4319 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4320 {
4321         struct cpu_dyn_regs *dyn_regs =
4322                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4323         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4324         struct gaudi_device *gaudi = hdev->asic_specific;
4325         bool invalid_queue = false;
4326         int dma_id;
4327
4328         switch (hw_queue_id) {
4329         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4330                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4331                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4332                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4333                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4334                 break;
4335
4336         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4337                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4338                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4339                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4340                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4341                 break;
4342
4343         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4344                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4345                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4346                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4347                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4348                 break;
4349
4350         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4351                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4352                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4353                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4354                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4355                 break;
4356
4357         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4358                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4359                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4360                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4361                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4362                 break;
4363
4364         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4365                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4366                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4367                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4368                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4369                 break;
4370
4371         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4372                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4373                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4374                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4375                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4376                 break;
4377
4378         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4379                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4380                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4381                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4382                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4383                 break;
4384
4385         case GAUDI_QUEUE_ID_CPU_PQ:
4386                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4387                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4388                 else
4389                         invalid_queue = true;
4390                 break;
4391
4392         case GAUDI_QUEUE_ID_MME_0_0:
4393                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4394                 break;
4395
4396         case GAUDI_QUEUE_ID_MME_0_1:
4397                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4398                 break;
4399
4400         case GAUDI_QUEUE_ID_MME_0_2:
4401                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4402                 break;
4403
4404         case GAUDI_QUEUE_ID_MME_0_3:
4405                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4406                 break;
4407
4408         case GAUDI_QUEUE_ID_MME_1_0:
4409                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4410                 break;
4411
4412         case GAUDI_QUEUE_ID_MME_1_1:
4413                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4414                 break;
4415
4416         case GAUDI_QUEUE_ID_MME_1_2:
4417                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4418                 break;
4419
4420         case GAUDI_QUEUE_ID_MME_1_3:
4421                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4422                 break;
4423
4424         case GAUDI_QUEUE_ID_TPC_0_0:
4425                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4426                 break;
4427
4428         case GAUDI_QUEUE_ID_TPC_0_1:
4429                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4430                 break;
4431
4432         case GAUDI_QUEUE_ID_TPC_0_2:
4433                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4434                 break;
4435
4436         case GAUDI_QUEUE_ID_TPC_0_3:
4437                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4438                 break;
4439
4440         case GAUDI_QUEUE_ID_TPC_1_0:
4441                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4442                 break;
4443
4444         case GAUDI_QUEUE_ID_TPC_1_1:
4445                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4446                 break;
4447
4448         case GAUDI_QUEUE_ID_TPC_1_2:
4449                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4450                 break;
4451
4452         case GAUDI_QUEUE_ID_TPC_1_3:
4453                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4454                 break;
4455
4456         case GAUDI_QUEUE_ID_TPC_2_0:
4457                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4458                 break;
4459
4460         case GAUDI_QUEUE_ID_TPC_2_1:
4461                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4462                 break;
4463
4464         case GAUDI_QUEUE_ID_TPC_2_2:
4465                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4466                 break;
4467
4468         case GAUDI_QUEUE_ID_TPC_2_3:
4469                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4470                 break;
4471
4472         case GAUDI_QUEUE_ID_TPC_3_0:
4473                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4474                 break;
4475
4476         case GAUDI_QUEUE_ID_TPC_3_1:
4477                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4478                 break;
4479
4480         case GAUDI_QUEUE_ID_TPC_3_2:
4481                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4482                 break;
4483
4484         case GAUDI_QUEUE_ID_TPC_3_3:
4485                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4486                 break;
4487
4488         case GAUDI_QUEUE_ID_TPC_4_0:
4489                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4490                 break;
4491
4492         case GAUDI_QUEUE_ID_TPC_4_1:
4493                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4494                 break;
4495
4496         case GAUDI_QUEUE_ID_TPC_4_2:
4497                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4498                 break;
4499
4500         case GAUDI_QUEUE_ID_TPC_4_3:
4501                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4502                 break;
4503
4504         case GAUDI_QUEUE_ID_TPC_5_0:
4505                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4506                 break;
4507
4508         case GAUDI_QUEUE_ID_TPC_5_1:
4509                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4510                 break;
4511
4512         case GAUDI_QUEUE_ID_TPC_5_2:
4513                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4514                 break;
4515
4516         case GAUDI_QUEUE_ID_TPC_5_3:
4517                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4518                 break;
4519
4520         case GAUDI_QUEUE_ID_TPC_6_0:
4521                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4522                 break;
4523
4524         case GAUDI_QUEUE_ID_TPC_6_1:
4525                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4526                 break;
4527
4528         case GAUDI_QUEUE_ID_TPC_6_2:
4529                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4530                 break;
4531
4532         case GAUDI_QUEUE_ID_TPC_6_3:
4533                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4534                 break;
4535
4536         case GAUDI_QUEUE_ID_TPC_7_0:
4537                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4538                 break;
4539
4540         case GAUDI_QUEUE_ID_TPC_7_1:
4541                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4542                 break;
4543
4544         case GAUDI_QUEUE_ID_TPC_7_2:
4545                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4546                 break;
4547
4548         case GAUDI_QUEUE_ID_TPC_7_3:
4549                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4550                 break;
4551
4552         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4553                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4554                         invalid_queue = true;
4555
4556                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4557                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4558                 break;
4559
4560         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4561                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4562                         invalid_queue = true;
4563
4564                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4565                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4566                 break;
4567
4568         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4569                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4570                         invalid_queue = true;
4571
4572                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4573                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4574                 break;
4575
4576         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4577                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4578                         invalid_queue = true;
4579
4580                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4581                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4582                 break;
4583
4584         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4585                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4586                         invalid_queue = true;
4587
4588                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4589                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4590                 break;
4591
4592         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4593                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4594                         invalid_queue = true;
4595
4596                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4597                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4598                 break;
4599
4600         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4601                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4602                         invalid_queue = true;
4603
4604                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4605                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4606                 break;
4607
4608         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4609                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4610                         invalid_queue = true;
4611
4612                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4613                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4614                 break;
4615
4616         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4617                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4618                         invalid_queue = true;
4619
4620                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4621                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4622                 break;
4623
4624         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4625                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4626                         invalid_queue = true;
4627
4628                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4629                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4630                 break;
4631
4632         default:
4633                 invalid_queue = true;
4634         }
4635
4636         if (invalid_queue) {
4637                 /* Should never get here */
4638                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4639                         hw_queue_id);
4640                 return;
4641         }
4642
4643         db_value = pi;
4644
4645         /* ring the doorbell */
4646         WREG32(db_reg_offset, db_value);
4647
4648         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4649                 /* make sure device CPU will read latest data from host */
4650                 mb();
4651
4652                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4653                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4654                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4655
4656                 WREG32(irq_handler_offset,
4657                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4658         }
4659 }
4660
4661 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4662                                 struct hl_bd *bd)
4663 {
4664         __le64 *pbd = (__le64 *) bd;
4665
4666         /* The QMANs are on the host memory so a simple copy suffice */
4667         pqe[0] = pbd[0];
4668         pqe[1] = pbd[1];
4669 }
4670
4671 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4672                                         dma_addr_t *dma_handle, gfp_t flags)
4673 {
4674         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4675                                                 dma_handle, flags);
4676
4677         /* Shift to the device's base physical address of host memory */
4678         if (kernel_addr)
4679                 *dma_handle += HOST_PHYS_BASE;
4680
4681         return kernel_addr;
4682 }
4683
4684 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4685                 void *cpu_addr, dma_addr_t dma_handle)
4686 {
4687         /* Cancel the device's base physical address of host memory */
4688         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4689
4690         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4691 }
4692
4693 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4694 {
4695         struct asic_fixed_properties *prop = &hdev->asic_prop;
4696         u64  cur_addr = DRAM_BASE_ADDR_USER;
4697         u32 val;
4698         u32 chunk_size;
4699         int rc, dma_id;
4700
4701         while (cur_addr < prop->dram_end_address) {
4702                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4703                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4704
4705                         chunk_size =
4706                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4707
4708                         dev_dbg(hdev->dev,
4709                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4710                                 cur_addr, cur_addr + chunk_size);
4711
4712                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4713                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4714                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4715                                                 lower_32_bits(cur_addr));
4716                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4717                                                 upper_32_bits(cur_addr));
4718                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4719                                         chunk_size);
4720                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4721                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4722                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4723
4724                         cur_addr += chunk_size;
4725
4726                         if (cur_addr == prop->dram_end_address)
4727                                 break;
4728                 }
4729
4730                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4731                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4732
4733                         rc = hl_poll_timeout(
4734                                 hdev,
4735                                 mmDMA0_CORE_STS0 + dma_offset,
4736                                 val,
4737                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4738                                 1000,
4739                                 HBM_SCRUBBING_TIMEOUT_US);
4740
4741                         if (rc) {
4742                                 dev_err(hdev->dev,
4743                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4744                                         dma_id);
4745                                 return -EIO;
4746                         }
4747                 }
4748         }
4749
4750         return 0;
4751 }
4752
4753 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4754 {
4755         struct asic_fixed_properties *prop = &hdev->asic_prop;
4756         struct gaudi_device *gaudi = hdev->asic_specific;
4757         int rc = 0;
4758         u64 val = 0;
4759
4760         if (!hdev->memory_scrub)
4761                 return 0;
4762
4763         if (!addr && !size) {
4764                 /* Wait till device is idle */
4765                 rc = hl_poll_timeout(
4766                                 hdev,
4767                                 mmDMA0_CORE_STS0/* dummy */,
4768                                 val/* dummy */,
4769                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4770                                                 0, NULL)),
4771                                                 1000,
4772                                                 HBM_SCRUBBING_TIMEOUT_US);
4773                 if (rc) {
4774                         dev_err(hdev->dev, "waiting for idle timeout\n");
4775                         return -EIO;
4776                 }
4777
4778                 /* Scrub SRAM */
4779                 addr = prop->sram_user_base_address;
4780                 size = hdev->pldm ? 0x10000 :
4781                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4782                 val = 0x7777777777777777ull;
4783
4784                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4785                 if (rc) {
4786                         dev_err(hdev->dev,
4787                                 "Failed to clear SRAM in mem scrub all\n");
4788                         return rc;
4789                 }
4790
4791                 mutex_lock(&gaudi->clk_gate_mutex);
4792                 hdev->asic_funcs->disable_clock_gating(hdev);
4793
4794                 /* Scrub HBM using all DMA channels in parallel */
4795                 rc = gaudi_hbm_scrubbing(hdev);
4796                 if (rc)
4797                         dev_err(hdev->dev,
4798                                 "Failed to clear HBM in mem scrub all\n");
4799
4800                 hdev->asic_funcs->set_clock_gating(hdev);
4801                 mutex_unlock(&gaudi->clk_gate_mutex);
4802         }
4803
4804         return rc;
4805 }
4806
4807 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4808                                 u32 queue_id, dma_addr_t *dma_handle,
4809                                 u16 *queue_len)
4810 {
4811         struct gaudi_device *gaudi = hdev->asic_specific;
4812         struct gaudi_internal_qman_info *q;
4813
4814         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4815                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4816                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4817                 return NULL;
4818         }
4819
4820         q = &gaudi->internal_qmans[queue_id];
4821         *dma_handle = q->pq_dma_addr;
4822         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4823
4824         return q->pq_kernel_addr;
4825 }
4826
4827 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4828                                 u16 len, u32 timeout, u64 *result)
4829 {
4830         struct gaudi_device *gaudi = hdev->asic_specific;
4831
4832         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4833                 if (result)
4834                         *result = 0;
4835                 return 0;
4836         }
4837
4838         if (!timeout)
4839                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4840
4841         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4842                                                 timeout, result);
4843 }
4844
4845 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4846 {
4847         struct packet_msg_prot *fence_pkt;
4848         dma_addr_t pkt_dma_addr;
4849         u32 fence_val, tmp, timeout_usec;
4850         dma_addr_t fence_dma_addr;
4851         u32 *fence_ptr;
4852         int rc;
4853
4854         if (hdev->pldm)
4855                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4856         else
4857                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4858
4859         fence_val = GAUDI_QMAN0_FENCE_VAL;
4860
4861         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4862                                                         &fence_dma_addr);
4863         if (!fence_ptr) {
4864                 dev_err(hdev->dev,
4865                         "Failed to allocate memory for H/W queue %d testing\n",
4866                         hw_queue_id);
4867                 return -ENOMEM;
4868         }
4869
4870         *fence_ptr = 0;
4871
4872         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4873                                         sizeof(struct packet_msg_prot),
4874                                         GFP_KERNEL, &pkt_dma_addr);
4875         if (!fence_pkt) {
4876                 dev_err(hdev->dev,
4877                         "Failed to allocate packet for H/W queue %d testing\n",
4878                         hw_queue_id);
4879                 rc = -ENOMEM;
4880                 goto free_fence_ptr;
4881         }
4882
4883         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4884         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4885         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4886
4887         fence_pkt->ctl = cpu_to_le32(tmp);
4888         fence_pkt->value = cpu_to_le32(fence_val);
4889         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4890
4891         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4892                                         sizeof(struct packet_msg_prot),
4893                                         pkt_dma_addr);
4894         if (rc) {
4895                 dev_err(hdev->dev,
4896                         "Failed to send fence packet to H/W queue %d\n",
4897                         hw_queue_id);
4898                 goto free_pkt;
4899         }
4900
4901         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4902                                         1000, timeout_usec, true);
4903
4904         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4905
4906         if (rc == -ETIMEDOUT) {
4907                 dev_err(hdev->dev,
4908                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4909                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4910                 rc = -EIO;
4911         }
4912
4913 free_pkt:
4914         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4915                                         pkt_dma_addr);
4916 free_fence_ptr:
4917         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4918                                         fence_dma_addr);
4919         return rc;
4920 }
4921
4922 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4923 {
4924         struct gaudi_device *gaudi = hdev->asic_specific;
4925
4926         /*
4927          * check capability here as send_cpu_message() won't update the result
4928          * value if no capability
4929          */
4930         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4931                 return 0;
4932
4933         return hl_fw_test_cpu_queue(hdev);
4934 }
4935
4936 static int gaudi_test_queues(struct hl_device *hdev)
4937 {
4938         int i, rc, ret_val = 0;
4939
4940         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4941                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4942                         rc = gaudi_test_queue(hdev, i);
4943                         if (rc)
4944                                 ret_val = -EINVAL;
4945                 }
4946         }
4947
4948         rc = gaudi_test_cpu_queue(hdev);
4949         if (rc)
4950                 ret_val = -EINVAL;
4951
4952         return ret_val;
4953 }
4954
4955 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4956                 gfp_t mem_flags, dma_addr_t *dma_handle)
4957 {
4958         void *kernel_addr;
4959
4960         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4961                 return NULL;
4962
4963         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4964
4965         /* Shift to the device's base physical address of host memory */
4966         if (kernel_addr)
4967                 *dma_handle += HOST_PHYS_BASE;
4968
4969         return kernel_addr;
4970 }
4971
4972 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4973                         dma_addr_t dma_addr)
4974 {
4975         /* Cancel the device's base physical address of host memory */
4976         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4977
4978         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4979 }
4980
4981 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4982                                         size_t size, dma_addr_t *dma_handle)
4983 {
4984         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4985 }
4986
4987 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4988                                                 size_t size, void *vaddr)
4989 {
4990         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4991 }
4992
4993 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4994                         int nents, enum dma_data_direction dir)
4995 {
4996         struct scatterlist *sg;
4997         int i;
4998
4999         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5000                 return -ENOMEM;
5001
5002         /* Shift to the device's base physical address of host memory */
5003         for_each_sg(sgl, sg, nents, i)
5004                 sg->dma_address += HOST_PHYS_BASE;
5005
5006         return 0;
5007 }
5008
5009 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5010                         int nents, enum dma_data_direction dir)
5011 {
5012         struct scatterlist *sg;
5013         int i;
5014
5015         /* Cancel the device's base physical address of host memory */
5016         for_each_sg(sgl, sg, nents, i)
5017                 sg->dma_address -= HOST_PHYS_BASE;
5018
5019         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5020 }
5021
5022 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5023                                         struct sg_table *sgt)
5024 {
5025         struct scatterlist *sg, *sg_next_iter;
5026         u32 count, dma_desc_cnt;
5027         u64 len, len_next;
5028         dma_addr_t addr, addr_next;
5029
5030         dma_desc_cnt = 0;
5031
5032         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5033
5034                 len = sg_dma_len(sg);
5035                 addr = sg_dma_address(sg);
5036
5037                 if (len == 0)
5038                         break;
5039
5040                 while ((count + 1) < sgt->nents) {
5041                         sg_next_iter = sg_next(sg);
5042                         len_next = sg_dma_len(sg_next_iter);
5043                         addr_next = sg_dma_address(sg_next_iter);
5044
5045                         if (len_next == 0)
5046                                 break;
5047
5048                         if ((addr + len == addr_next) &&
5049                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5050                                 len += len_next;
5051                                 count++;
5052                                 sg = sg_next_iter;
5053                         } else {
5054                                 break;
5055                         }
5056                 }
5057
5058                 dma_desc_cnt++;
5059         }
5060
5061         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5062 }
5063
5064 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5065                                 struct hl_cs_parser *parser,
5066                                 struct packet_lin_dma *user_dma_pkt,
5067                                 u64 addr, enum dma_data_direction dir)
5068 {
5069         struct hl_userptr *userptr;
5070         int rc;
5071
5072         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5073                         parser->job_userptr_list, &userptr))
5074                 goto already_pinned;
5075
5076         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5077         if (!userptr)
5078                 return -ENOMEM;
5079
5080         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5081                                 userptr);
5082         if (rc)
5083                 goto free_userptr;
5084
5085         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5086
5087         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5088                                         userptr->sgt->nents, dir);
5089         if (rc) {
5090                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5091                 goto unpin_memory;
5092         }
5093
5094         userptr->dma_mapped = true;
5095         userptr->dir = dir;
5096
5097 already_pinned:
5098         parser->patched_cb_size +=
5099                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5100
5101         return 0;
5102
5103 unpin_memory:
5104         list_del(&userptr->job_node);
5105         hl_unpin_host_memory(hdev, userptr);
5106 free_userptr:
5107         kfree(userptr);
5108         return rc;
5109 }
5110
5111 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5112                                 struct hl_cs_parser *parser,
5113                                 struct packet_lin_dma *user_dma_pkt,
5114                                 bool src_in_host)
5115 {
5116         enum dma_data_direction dir;
5117         bool skip_host_mem_pin = false, user_memset;
5118         u64 addr;
5119         int rc = 0;
5120
5121         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5122                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5123                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5124
5125         if (src_in_host) {
5126                 if (user_memset)
5127                         skip_host_mem_pin = true;
5128
5129                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5130                 dir = DMA_TO_DEVICE;
5131                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5132         } else {
5133                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5134                 dir = DMA_FROM_DEVICE;
5135                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5136                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5137                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5138         }
5139
5140         if (skip_host_mem_pin)
5141                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5142         else
5143                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5144                                                 addr, dir);
5145
5146         return rc;
5147 }
5148
5149 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5150                                 struct hl_cs_parser *parser,
5151                                 struct packet_lin_dma *user_dma_pkt)
5152 {
5153         bool src_in_host = false;
5154         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5155                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5156                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5157
5158         dev_dbg(hdev->dev, "DMA packet details:\n");
5159         dev_dbg(hdev->dev, "source == 0x%llx\n",
5160                                 le64_to_cpu(user_dma_pkt->src_addr));
5161         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5162         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5163
5164         /*
5165          * Special handling for DMA with size 0. Bypass all validations
5166          * because no transactions will be done except for WR_COMP, which
5167          * is not a security issue
5168          */
5169         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5170                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5171                 return 0;
5172         }
5173
5174         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5175                 src_in_host = true;
5176
5177         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5178                                                 src_in_host);
5179 }
5180
5181 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5182                                         struct hl_cs_parser *parser,
5183                                         struct packet_load_and_exe *user_pkt)
5184 {
5185         u32 cfg;
5186
5187         cfg = le32_to_cpu(user_pkt->cfg);
5188
5189         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5190                 dev_err(hdev->dev,
5191                         "User not allowed to use Load and Execute\n");
5192                 return -EPERM;
5193         }
5194
5195         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5196
5197         return 0;
5198 }
5199
5200 static int gaudi_validate_cb(struct hl_device *hdev,
5201                         struct hl_cs_parser *parser, bool is_mmu)
5202 {
5203         u32 cb_parsed_length = 0;
5204         int rc = 0;
5205
5206         parser->patched_cb_size = 0;
5207
5208         /* cb_user_size is more than 0 so loop will always be executed */
5209         while (cb_parsed_length < parser->user_cb_size) {
5210                 enum packet_id pkt_id;
5211                 u16 pkt_size;
5212                 struct gaudi_packet *user_pkt;
5213
5214                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5215
5216                 pkt_id = (enum packet_id) (
5217                                 (le64_to_cpu(user_pkt->header) &
5218                                 PACKET_HEADER_PACKET_ID_MASK) >>
5219                                         PACKET_HEADER_PACKET_ID_SHIFT);
5220
5221                 if (!validate_packet_id(pkt_id)) {
5222                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5223                         rc = -EINVAL;
5224                         break;
5225                 }
5226
5227                 pkt_size = gaudi_packet_sizes[pkt_id];
5228                 cb_parsed_length += pkt_size;
5229                 if (cb_parsed_length > parser->user_cb_size) {
5230                         dev_err(hdev->dev,
5231                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5232                         rc = -EINVAL;
5233                         break;
5234                 }
5235
5236                 switch (pkt_id) {
5237                 case PACKET_MSG_PROT:
5238                         dev_err(hdev->dev,
5239                                 "User not allowed to use MSG_PROT\n");
5240                         rc = -EPERM;
5241                         break;
5242
5243                 case PACKET_CP_DMA:
5244                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5245                         rc = -EPERM;
5246                         break;
5247
5248                 case PACKET_STOP:
5249                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5250                         rc = -EPERM;
5251                         break;
5252
5253                 case PACKET_WREG_BULK:
5254                         dev_err(hdev->dev,
5255                                 "User not allowed to use WREG_BULK\n");
5256                         rc = -EPERM;
5257                         break;
5258
5259                 case PACKET_LOAD_AND_EXE:
5260                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5261                                 (struct packet_load_and_exe *) user_pkt);
5262                         break;
5263
5264                 case PACKET_LIN_DMA:
5265                         parser->contains_dma_pkt = true;
5266                         if (is_mmu)
5267                                 parser->patched_cb_size += pkt_size;
5268                         else
5269                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5270                                         (struct packet_lin_dma *) user_pkt);
5271                         break;
5272
5273                 case PACKET_WREG_32:
5274                 case PACKET_MSG_LONG:
5275                 case PACKET_MSG_SHORT:
5276                 case PACKET_REPEAT:
5277                 case PACKET_FENCE:
5278                 case PACKET_NOP:
5279                 case PACKET_ARB_POINT:
5280                         parser->patched_cb_size += pkt_size;
5281                         break;
5282
5283                 default:
5284                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5285                                 pkt_id);
5286                         rc = -EINVAL;
5287                         break;
5288                 }
5289
5290                 if (rc)
5291                         break;
5292         }
5293
5294         /*
5295          * The new CB should have space at the end for two MSG_PROT packets:
5296          * 1. A packet that will act as a completion packet
5297          * 2. A packet that will generate MSI-X interrupt
5298          */
5299         if (parser->completion)
5300                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5301
5302         return rc;
5303 }
5304
5305 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5306                                 struct hl_cs_parser *parser,
5307                                 struct packet_lin_dma *user_dma_pkt,
5308                                 struct packet_lin_dma *new_dma_pkt,
5309                                 u32 *new_dma_pkt_size)
5310 {
5311         struct hl_userptr *userptr;
5312         struct scatterlist *sg, *sg_next_iter;
5313         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5314         u64 len, len_next;
5315         dma_addr_t dma_addr, dma_addr_next;
5316         u64 device_memory_addr, addr;
5317         enum dma_data_direction dir;
5318         struct sg_table *sgt;
5319         bool src_in_host = false;
5320         bool skip_host_mem_pin = false;
5321         bool user_memset;
5322
5323         ctl = le32_to_cpu(user_dma_pkt->ctl);
5324
5325         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5326                 src_in_host = true;
5327
5328         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5329                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5330
5331         if (src_in_host) {
5332                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5333                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5334                 dir = DMA_TO_DEVICE;
5335                 if (user_memset)
5336                         skip_host_mem_pin = true;
5337         } else {
5338                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5339                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5340                 dir = DMA_FROM_DEVICE;
5341         }
5342
5343         if ((!skip_host_mem_pin) &&
5344                 (!hl_userptr_is_pinned(hdev, addr,
5345                                         le32_to_cpu(user_dma_pkt->tsize),
5346                                         parser->job_userptr_list, &userptr))) {
5347                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5348                                 addr, user_dma_pkt->tsize);
5349                 return -EFAULT;
5350         }
5351
5352         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5353                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5354                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5355                 return 0;
5356         }
5357
5358         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5359
5360         sgt = userptr->sgt;
5361         dma_desc_cnt = 0;
5362
5363         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5364                 len = sg_dma_len(sg);
5365                 dma_addr = sg_dma_address(sg);
5366
5367                 if (len == 0)
5368                         break;
5369
5370                 while ((count + 1) < sgt->nents) {
5371                         sg_next_iter = sg_next(sg);
5372                         len_next = sg_dma_len(sg_next_iter);
5373                         dma_addr_next = sg_dma_address(sg_next_iter);
5374
5375                         if (len_next == 0)
5376                                 break;
5377
5378                         if ((dma_addr + len == dma_addr_next) &&
5379                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5380                                 len += len_next;
5381                                 count++;
5382                                 sg = sg_next_iter;
5383                         } else {
5384                                 break;
5385                         }
5386                 }
5387
5388                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5389                 if (likely(dma_desc_cnt))
5390                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5391                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5392                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5393                 new_dma_pkt->tsize = cpu_to_le32(len);
5394
5395                 if (dir == DMA_TO_DEVICE) {
5396                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5397                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5398                 } else {
5399                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5400                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5401                 }
5402
5403                 if (!user_memset)
5404                         device_memory_addr += len;
5405                 dma_desc_cnt++;
5406                 new_dma_pkt++;
5407         }
5408
5409         if (!dma_desc_cnt) {
5410                 dev_err(hdev->dev,
5411                         "Error of 0 SG entries when patching DMA packet\n");
5412                 return -EFAULT;
5413         }
5414
5415         /* Fix the last dma packet - wrcomp must be as user set it */
5416         new_dma_pkt--;
5417         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5418
5419         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5420
5421         return 0;
5422 }
5423
5424 static int gaudi_patch_cb(struct hl_device *hdev,
5425                                 struct hl_cs_parser *parser)
5426 {
5427         u32 cb_parsed_length = 0;
5428         u32 cb_patched_cur_length = 0;
5429         int rc = 0;
5430
5431         /* cb_user_size is more than 0 so loop will always be executed */
5432         while (cb_parsed_length < parser->user_cb_size) {
5433                 enum packet_id pkt_id;
5434                 u16 pkt_size;
5435                 u32 new_pkt_size = 0;
5436                 struct gaudi_packet *user_pkt, *kernel_pkt;
5437
5438                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5439                 kernel_pkt = parser->patched_cb->kernel_address +
5440                                         cb_patched_cur_length;
5441
5442                 pkt_id = (enum packet_id) (
5443                                 (le64_to_cpu(user_pkt->header) &
5444                                 PACKET_HEADER_PACKET_ID_MASK) >>
5445                                         PACKET_HEADER_PACKET_ID_SHIFT);
5446
5447                 if (!validate_packet_id(pkt_id)) {
5448                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5449                         rc = -EINVAL;
5450                         break;
5451                 }
5452
5453                 pkt_size = gaudi_packet_sizes[pkt_id];
5454                 cb_parsed_length += pkt_size;
5455                 if (cb_parsed_length > parser->user_cb_size) {
5456                         dev_err(hdev->dev,
5457                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5458                         rc = -EINVAL;
5459                         break;
5460                 }
5461
5462                 switch (pkt_id) {
5463                 case PACKET_LIN_DMA:
5464                         rc = gaudi_patch_dma_packet(hdev, parser,
5465                                         (struct packet_lin_dma *) user_pkt,
5466                                         (struct packet_lin_dma *) kernel_pkt,
5467                                         &new_pkt_size);
5468                         cb_patched_cur_length += new_pkt_size;
5469                         break;
5470
5471                 case PACKET_MSG_PROT:
5472                         dev_err(hdev->dev,
5473                                 "User not allowed to use MSG_PROT\n");
5474                         rc = -EPERM;
5475                         break;
5476
5477                 case PACKET_CP_DMA:
5478                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5479                         rc = -EPERM;
5480                         break;
5481
5482                 case PACKET_STOP:
5483                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5484                         rc = -EPERM;
5485                         break;
5486
5487                 case PACKET_WREG_32:
5488                 case PACKET_WREG_BULK:
5489                 case PACKET_MSG_LONG:
5490                 case PACKET_MSG_SHORT:
5491                 case PACKET_REPEAT:
5492                 case PACKET_FENCE:
5493                 case PACKET_NOP:
5494                 case PACKET_ARB_POINT:
5495                 case PACKET_LOAD_AND_EXE:
5496                         memcpy(kernel_pkt, user_pkt, pkt_size);
5497                         cb_patched_cur_length += pkt_size;
5498                         break;
5499
5500                 default:
5501                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5502                                 pkt_id);
5503                         rc = -EINVAL;
5504                         break;
5505                 }
5506
5507                 if (rc)
5508                         break;
5509         }
5510
5511         return rc;
5512 }
5513
5514 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5515                 struct hl_cs_parser *parser)
5516 {
5517         u64 patched_cb_handle;
5518         u32 patched_cb_size;
5519         struct hl_cb *user_cb;
5520         int rc;
5521
5522         /*
5523          * The new CB should have space at the end for two MSG_PROT pkt:
5524          * 1. A packet that will act as a completion packet
5525          * 2. A packet that will generate MSI interrupt
5526          */
5527         if (parser->completion)
5528                 parser->patched_cb_size = parser->user_cb_size +
5529                                 sizeof(struct packet_msg_prot) * 2;
5530         else
5531                 parser->patched_cb_size = parser->user_cb_size;
5532
5533         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5534                                 parser->patched_cb_size, false, false,
5535                                 &patched_cb_handle);
5536
5537         if (rc) {
5538                 dev_err(hdev->dev,
5539                         "Failed to allocate patched CB for DMA CS %d\n",
5540                         rc);
5541                 return rc;
5542         }
5543
5544         patched_cb_handle >>= PAGE_SHIFT;
5545         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5546                                 (u32) patched_cb_handle);
5547         /* hl_cb_get should never fail */
5548         if (!parser->patched_cb) {
5549                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5550                         (u32) patched_cb_handle);
5551                 rc = -EFAULT;
5552                 goto out;
5553         }
5554
5555         /*
5556          * The check that parser->user_cb_size <= parser->user_cb->size was done
5557          * in validate_queue_index().
5558          */
5559         memcpy(parser->patched_cb->kernel_address,
5560                 parser->user_cb->kernel_address,
5561                 parser->user_cb_size);
5562
5563         patched_cb_size = parser->patched_cb_size;
5564
5565         /* Validate patched CB instead of user CB */
5566         user_cb = parser->user_cb;
5567         parser->user_cb = parser->patched_cb;
5568         rc = gaudi_validate_cb(hdev, parser, true);
5569         parser->user_cb = user_cb;
5570
5571         if (rc) {
5572                 hl_cb_put(parser->patched_cb);
5573                 goto out;
5574         }
5575
5576         if (patched_cb_size != parser->patched_cb_size) {
5577                 dev_err(hdev->dev, "user CB size mismatch\n");
5578                 hl_cb_put(parser->patched_cb);
5579                 rc = -EINVAL;
5580                 goto out;
5581         }
5582
5583 out:
5584         /*
5585          * Always call cb destroy here because we still have 1 reference
5586          * to it by calling cb_get earlier. After the job will be completed,
5587          * cb_put will release it, but here we want to remove it from the
5588          * idr
5589          */
5590         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5591                                         patched_cb_handle << PAGE_SHIFT);
5592
5593         return rc;
5594 }
5595
5596 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5597                 struct hl_cs_parser *parser)
5598 {
5599         u64 patched_cb_handle;
5600         int rc;
5601
5602         rc = gaudi_validate_cb(hdev, parser, false);
5603
5604         if (rc)
5605                 goto free_userptr;
5606
5607         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5608                                 parser->patched_cb_size, false, false,
5609                                 &patched_cb_handle);
5610         if (rc) {
5611                 dev_err(hdev->dev,
5612                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5613                 goto free_userptr;
5614         }
5615
5616         patched_cb_handle >>= PAGE_SHIFT;
5617         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5618                                 (u32) patched_cb_handle);
5619         /* hl_cb_get should never fail here */
5620         if (!parser->patched_cb) {
5621                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5622                                 (u32) patched_cb_handle);
5623                 rc = -EFAULT;
5624                 goto out;
5625         }
5626
5627         rc = gaudi_patch_cb(hdev, parser);
5628
5629         if (rc)
5630                 hl_cb_put(parser->patched_cb);
5631
5632 out:
5633         /*
5634          * Always call cb destroy here because we still have 1 reference
5635          * to it by calling cb_get earlier. After the job will be completed,
5636          * cb_put will release it, but here we want to remove it from the
5637          * idr
5638          */
5639         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5640                                 patched_cb_handle << PAGE_SHIFT);
5641
5642 free_userptr:
5643         if (rc)
5644                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5645         return rc;
5646 }
5647
5648 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5649                                         struct hl_cs_parser *parser)
5650 {
5651         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5652         struct gaudi_device *gaudi = hdev->asic_specific;
5653         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5654                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5655
5656         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5657                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5658                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5659                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5660                                 parser->hw_queue_id);
5661                 return -EINVAL;
5662         }
5663
5664         /* For internal queue jobs just check if CB address is valid */
5665         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5666                                         parser->user_cb_size,
5667                                         asic_prop->sram_user_base_address,
5668                                         asic_prop->sram_end_address))
5669                 return 0;
5670
5671         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5672                                         parser->user_cb_size,
5673                                         asic_prop->dram_user_base_address,
5674                                         asic_prop->dram_end_address))
5675                 return 0;
5676
5677         /* PMMU and HPMMU addresses are equal, check only one of them */
5678         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5679                                         parser->user_cb_size,
5680                                         asic_prop->pmmu.start_addr,
5681                                         asic_prop->pmmu.end_addr))
5682                 return 0;
5683
5684         dev_err(hdev->dev,
5685                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5686                 parser->user_cb, parser->user_cb_size);
5687
5688         return -EFAULT;
5689 }
5690
5691 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5692 {
5693         struct gaudi_device *gaudi = hdev->asic_specific;
5694
5695         if (parser->queue_type == QUEUE_TYPE_INT)
5696                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5697
5698         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5699                 return gaudi_parse_cb_mmu(hdev, parser);
5700         else
5701                 return gaudi_parse_cb_no_mmu(hdev, parser);
5702 }
5703
5704 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5705                                         void *kernel_address, u32 len,
5706                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5707                                         bool eb)
5708 {
5709         struct gaudi_device *gaudi = hdev->asic_specific;
5710         struct packet_msg_prot *cq_pkt;
5711         u32 tmp;
5712
5713         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5714
5715         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5716         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5717
5718         if (eb)
5719                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5720
5721         cq_pkt->ctl = cpu_to_le32(tmp);
5722         cq_pkt->value = cpu_to_le32(cq_val);
5723         cq_pkt->addr = cpu_to_le64(cq_addr);
5724
5725         cq_pkt++;
5726
5727         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5728         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5729         cq_pkt->ctl = cpu_to_le32(tmp);
5730         cq_pkt->value = cpu_to_le32(1);
5731
5732         if (!gaudi->multi_msi_mode)
5733                 msi_vec = 0;
5734
5735         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5736 }
5737
5738 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5739 {
5740         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5741 }
5742
5743 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5744                                         u32 size, u64 val)
5745 {
5746         struct packet_lin_dma *lin_dma_pkt;
5747         struct hl_cs_job *job;
5748         u32 cb_size, ctl, err_cause;
5749         struct hl_cb *cb;
5750         u64 id;
5751         int rc;
5752
5753         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5754         if (!cb)
5755                 return -EFAULT;
5756
5757         lin_dma_pkt = cb->kernel_address;
5758         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5759         cb_size = sizeof(*lin_dma_pkt);
5760
5761         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5762         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5763         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5764         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5765         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5766
5767         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5768         lin_dma_pkt->src_addr = cpu_to_le64(val);
5769         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5770         lin_dma_pkt->tsize = cpu_to_le32(size);
5771
5772         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5773         if (!job) {
5774                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5775                 rc = -ENOMEM;
5776                 goto release_cb;
5777         }
5778
5779         /* Verify DMA is OK */
5780         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5781         if (err_cause && !hdev->init_done) {
5782                 dev_dbg(hdev->dev,
5783                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5784                         err_cause);
5785                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5786         }
5787
5788         job->id = 0;
5789         job->user_cb = cb;
5790         atomic_inc(&job->user_cb->cs_cnt);
5791         job->user_cb_size = cb_size;
5792         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5793         job->patched_cb = job->user_cb;
5794         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5795
5796         hl_debugfs_add_job(hdev, job);
5797
5798         rc = gaudi_send_job_on_qman0(hdev, job);
5799         hl_debugfs_remove_job(hdev, job);
5800         kfree(job);
5801         atomic_dec(&cb->cs_cnt);
5802
5803         /* Verify DMA is OK */
5804         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5805         if (err_cause) {
5806                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5807                 rc = -EIO;
5808                 if (!hdev->init_done) {
5809                         dev_dbg(hdev->dev,
5810                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5811                                 err_cause);
5812                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5813                 }
5814         }
5815
5816 release_cb:
5817         id = cb->id;
5818         hl_cb_put(cb);
5819         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5820
5821         return rc;
5822 }
5823
5824 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5825                                         u32 num_regs, u32 val)
5826 {
5827         struct packet_msg_long *pkt;
5828         struct hl_cs_job *job;
5829         u32 cb_size, ctl;
5830         struct hl_cb *cb;
5831         int i, rc;
5832
5833         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5834
5835         if (cb_size > SZ_2M) {
5836                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5837                 return -ENOMEM;
5838         }
5839
5840         cb = hl_cb_kernel_create(hdev, cb_size, false);
5841         if (!cb)
5842                 return -EFAULT;
5843
5844         pkt = cb->kernel_address;
5845
5846         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5847         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5848         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5849         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5850         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5851
5852         for (i = 0; i < num_regs ; i++, pkt++) {
5853                 pkt->ctl = cpu_to_le32(ctl);
5854                 pkt->value = cpu_to_le32(val);
5855                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5856         }
5857
5858         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5859         if (!job) {
5860                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5861                 rc = -ENOMEM;
5862                 goto release_cb;
5863         }
5864
5865         job->id = 0;
5866         job->user_cb = cb;
5867         atomic_inc(&job->user_cb->cs_cnt);
5868         job->user_cb_size = cb_size;
5869         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5870         job->patched_cb = job->user_cb;
5871         job->job_cb_size = cb_size;
5872
5873         hl_debugfs_add_job(hdev, job);
5874
5875         rc = gaudi_send_job_on_qman0(hdev, job);
5876         hl_debugfs_remove_job(hdev, job);
5877         kfree(job);
5878         atomic_dec(&cb->cs_cnt);
5879
5880 release_cb:
5881         hl_cb_put(cb);
5882         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5883
5884         return rc;
5885 }
5886
5887 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5888                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5889 {
5890         struct hl_ctx *ctx;
5891         struct hl_pending_cb *pending_cb;
5892         struct packet_msg_long *pkt;
5893         u32 cb_size, ctl;
5894         struct hl_cb *cb;
5895         int i, rc;
5896
5897         mutex_lock(&hdev->fpriv_list_lock);
5898         ctx = hdev->compute_ctx;
5899
5900         /* If no compute context available or context is going down
5901          * memset registers directly
5902          */
5903         if (!ctx || kref_read(&ctx->refcount) == 0) {
5904                 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5905                 mutex_unlock(&hdev->fpriv_list_lock);
5906                 return rc;
5907         }
5908
5909         mutex_unlock(&hdev->fpriv_list_lock);
5910
5911         cb_size = (sizeof(*pkt) * num_regs) +
5912                         sizeof(struct packet_msg_prot) * 2;
5913
5914         if (cb_size > SZ_2M) {
5915                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5916                 return -ENOMEM;
5917         }
5918
5919         pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5920         if (!pending_cb)
5921                 return -ENOMEM;
5922
5923         cb = hl_cb_kernel_create(hdev, cb_size, false);
5924         if (!cb) {
5925                 kfree(pending_cb);
5926                 return -EFAULT;
5927         }
5928
5929         pkt = cb->kernel_address;
5930
5931         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5932         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5933         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5934         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5935         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5936
5937         for (i = 0; i < num_regs ; i++, pkt++) {
5938                 pkt->ctl = cpu_to_le32(ctl);
5939                 pkt->value = cpu_to_le32(val);
5940                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5941         }
5942
5943         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5944
5945         pending_cb->cb = cb;
5946         pending_cb->cb_size = cb_size;
5947         /* The queue ID MUST be an external queue ID. Otherwise, we will
5948          * have undefined behavior
5949          */
5950         pending_cb->hw_queue_id = hw_queue_id;
5951
5952         spin_lock(&ctx->pending_cb_lock);
5953         list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5954         spin_unlock(&ctx->pending_cb_lock);
5955
5956         return 0;
5957 }
5958
5959 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5960 {
5961         u64 base_addr;
5962         u32 num_regs;
5963         int rc;
5964
5965         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5966         num_regs = NUM_OF_SOB_IN_BLOCK;
5967         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5968         if (rc) {
5969                 dev_err(hdev->dev, "failed resetting SM registers");
5970                 return -ENOMEM;
5971         }
5972
5973         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5974         num_regs = NUM_OF_SOB_IN_BLOCK;
5975         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5976         if (rc) {
5977                 dev_err(hdev->dev, "failed resetting SM registers");
5978                 return -ENOMEM;
5979         }
5980
5981         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5982         num_regs = NUM_OF_SOB_IN_BLOCK;
5983         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5984         if (rc) {
5985                 dev_err(hdev->dev, "failed resetting SM registers");
5986                 return -ENOMEM;
5987         }
5988
5989         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5990         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5991         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5992         if (rc) {
5993                 dev_err(hdev->dev, "failed resetting SM registers");
5994                 return -ENOMEM;
5995         }
5996
5997         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5998         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5999         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6000         if (rc) {
6001                 dev_err(hdev->dev, "failed resetting SM registers");
6002                 return -ENOMEM;
6003         }
6004
6005         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
6006         num_regs = NUM_OF_MONITORS_IN_BLOCK;
6007         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6008         if (rc) {
6009                 dev_err(hdev->dev, "failed resetting SM registers");
6010                 return -ENOMEM;
6011         }
6012
6013         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6014                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6015         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6016         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6017         if (rc) {
6018                 dev_err(hdev->dev, "failed resetting SM registers");
6019                 return -ENOMEM;
6020         }
6021
6022         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6023                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6024         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6025         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6026         if (rc) {
6027                 dev_err(hdev->dev, "failed resetting SM registers");
6028                 return -ENOMEM;
6029         }
6030
6031         return 0;
6032 }
6033
6034 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6035 {
6036         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6037                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6038         int i;
6039
6040         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6041                 u64 sob_addr = CFG_BASE +
6042                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6043                                 (i * sob_delta);
6044                 u32 dma_offset = i * DMA_CORE_OFFSET;
6045
6046                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6047                                 lower_32_bits(sob_addr));
6048                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6049                                 upper_32_bits(sob_addr));
6050                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6051
6052                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6053                  * modified by the user for SRAM reduction
6054                  */
6055                 if (i > 1)
6056                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6057                                                                 0x00000001);
6058         }
6059 }
6060
6061 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6062 {
6063         u32 qman_offset;
6064         int i;
6065
6066         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6067                 qman_offset = i * DMA_QMAN_OFFSET;
6068                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6069         }
6070
6071         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6072                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6073                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6074         }
6075
6076         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6077                 qman_offset = i * TPC_QMAN_OFFSET;
6078                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6079         }
6080
6081         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6082                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6083                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6084                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6085         }
6086 }
6087
6088 static int gaudi_restore_user_registers(struct hl_device *hdev)
6089 {
6090         int rc;
6091
6092         rc = gaudi_restore_sm_registers(hdev);
6093         if (rc)
6094                 return rc;
6095
6096         gaudi_restore_dma_registers(hdev);
6097         gaudi_restore_qm_registers(hdev);
6098
6099         return 0;
6100 }
6101
6102 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6103 {
6104         return gaudi_restore_user_registers(hdev);
6105 }
6106
6107 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6108 {
6109         struct asic_fixed_properties *prop = &hdev->asic_prop;
6110         struct gaudi_device *gaudi = hdev->asic_specific;
6111         u64 addr = prop->mmu_pgt_addr;
6112         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6113
6114         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6115                 return 0;
6116
6117         return gaudi_memset_device_memory(hdev, addr, size, 0);
6118 }
6119
6120 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6121 {
6122
6123 }
6124
6125 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6126                         bool user_address, u32 *val)
6127 {
6128         struct asic_fixed_properties *prop = &hdev->asic_prop;
6129         struct gaudi_device *gaudi = hdev->asic_specific;
6130         u64 hbm_bar_addr, host_phys_end;
6131         int rc = 0;
6132
6133         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6134
6135         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6136
6137                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6138                                 (hdev->clock_gating_mask &
6139                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6140
6141                         dev_err_ratelimited(hdev->dev,
6142                                 "Can't read register - clock gating is enabled!\n");
6143                         rc = -EFAULT;
6144                 } else {
6145                         *val = RREG32(addr - CFG_BASE);
6146                 }
6147
6148         } else if ((addr >= SRAM_BASE_ADDR) &&
6149                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6150                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6151                                 (addr - SRAM_BASE_ADDR));
6152         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6153                 u64 bar_base_addr = DRAM_PHYS_BASE +
6154                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6155
6156                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6157                 if (hbm_bar_addr != U64_MAX) {
6158                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6159                                                 (addr - bar_base_addr));
6160
6161                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6162                                                 hbm_bar_addr);
6163                 }
6164                 if (hbm_bar_addr == U64_MAX)
6165                         rc = -EIO;
6166         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6167                         user_address && !iommu_present(&pci_bus_type)) {
6168                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6169         } else {
6170                 rc = -EFAULT;
6171         }
6172
6173         return rc;
6174 }
6175
6176 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6177                         bool user_address, u32 val)
6178 {
6179         struct asic_fixed_properties *prop = &hdev->asic_prop;
6180         struct gaudi_device *gaudi = hdev->asic_specific;
6181         u64 hbm_bar_addr, host_phys_end;
6182         int rc = 0;
6183
6184         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6185
6186         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6187
6188                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6189                                 (hdev->clock_gating_mask &
6190                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6191
6192                         dev_err_ratelimited(hdev->dev,
6193                                 "Can't write register - clock gating is enabled!\n");
6194                         rc = -EFAULT;
6195                 } else {
6196                         WREG32(addr - CFG_BASE, val);
6197                 }
6198
6199         } else if ((addr >= SRAM_BASE_ADDR) &&
6200                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6201                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6202                                         (addr - SRAM_BASE_ADDR));
6203         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6204                 u64 bar_base_addr = DRAM_PHYS_BASE +
6205                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6206
6207                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6208                 if (hbm_bar_addr != U64_MAX) {
6209                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6210                                                 (addr - bar_base_addr));
6211
6212                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6213                                                 hbm_bar_addr);
6214                 }
6215                 if (hbm_bar_addr == U64_MAX)
6216                         rc = -EIO;
6217         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6218                         user_address && !iommu_present(&pci_bus_type)) {
6219                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6220         } else {
6221                 rc = -EFAULT;
6222         }
6223
6224         return rc;
6225 }
6226
6227 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6228                                 bool user_address, u64 *val)
6229 {
6230         struct asic_fixed_properties *prop = &hdev->asic_prop;
6231         struct gaudi_device *gaudi = hdev->asic_specific;
6232         u64 hbm_bar_addr, host_phys_end;
6233         int rc = 0;
6234
6235         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6236
6237         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6238
6239                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6240                                 (hdev->clock_gating_mask &
6241                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6242
6243                         dev_err_ratelimited(hdev->dev,
6244                                 "Can't read register - clock gating is enabled!\n");
6245                         rc = -EFAULT;
6246                 } else {
6247                         u32 val_l = RREG32(addr - CFG_BASE);
6248                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6249
6250                         *val = (((u64) val_h) << 32) | val_l;
6251                 }
6252
6253         } else if ((addr >= SRAM_BASE_ADDR) &&
6254                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6255                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6256                                 (addr - SRAM_BASE_ADDR));
6257         } else if (addr <=
6258                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6259                 u64 bar_base_addr = DRAM_PHYS_BASE +
6260                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6261
6262                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6263                 if (hbm_bar_addr != U64_MAX) {
6264                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6265                                                 (addr - bar_base_addr));
6266
6267                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6268                                                 hbm_bar_addr);
6269                 }
6270                 if (hbm_bar_addr == U64_MAX)
6271                         rc = -EIO;
6272         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6273                         user_address && !iommu_present(&pci_bus_type)) {
6274                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6275         } else {
6276                 rc = -EFAULT;
6277         }
6278
6279         return rc;
6280 }
6281
6282 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6283                                 bool user_address, u64 val)
6284 {
6285         struct asic_fixed_properties *prop = &hdev->asic_prop;
6286         struct gaudi_device *gaudi = hdev->asic_specific;
6287         u64 hbm_bar_addr, host_phys_end;
6288         int rc = 0;
6289
6290         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6291
6292         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6293
6294                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6295                                 (hdev->clock_gating_mask &
6296                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6297
6298                         dev_err_ratelimited(hdev->dev,
6299                                 "Can't write register - clock gating is enabled!\n");
6300                         rc = -EFAULT;
6301                 } else {
6302                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6303                         WREG32(addr + sizeof(u32) - CFG_BASE,
6304                                 upper_32_bits(val));
6305                 }
6306
6307         } else if ((addr >= SRAM_BASE_ADDR) &&
6308                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6309                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6310                                         (addr - SRAM_BASE_ADDR));
6311         } else if (addr <=
6312                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6313                 u64 bar_base_addr = DRAM_PHYS_BASE +
6314                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6315
6316                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6317                 if (hbm_bar_addr != U64_MAX) {
6318                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6319                                                 (addr - bar_base_addr));
6320
6321                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6322                                                 hbm_bar_addr);
6323                 }
6324                 if (hbm_bar_addr == U64_MAX)
6325                         rc = -EIO;
6326         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6327                         user_address && !iommu_present(&pci_bus_type)) {
6328                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6329         } else {
6330                 rc = -EFAULT;
6331         }
6332
6333         return rc;
6334 }
6335
6336 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6337                                         u32 size_to_dma, dma_addr_t dma_addr)
6338 {
6339         u32 err_cause, val;
6340         u64 dma_offset;
6341         int rc;
6342
6343         dma_offset = dma_id * DMA_CORE_OFFSET;
6344
6345         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6346         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6347         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6348         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6349         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6350         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6351                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6352
6353         rc = hl_poll_timeout(
6354                 hdev,
6355                 mmDMA0_CORE_STS0 + dma_offset,
6356                 val,
6357                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6358                 0,
6359                 1000000);
6360
6361         if (rc) {
6362                 dev_err(hdev->dev,
6363                         "DMA %d timed-out during reading of 0x%llx\n",
6364                         dma_id, addr);
6365                 return -EIO;
6366         }
6367
6368         /* Verify DMA is OK */
6369         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6370         if (err_cause) {
6371                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6372                 dev_dbg(hdev->dev,
6373                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6374                         err_cause);
6375                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6376
6377                 return -EIO;
6378         }
6379
6380         return 0;
6381 }
6382
6383 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6384                                 void *blob_addr)
6385 {
6386         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6387         struct gaudi_device *gaudi = hdev->asic_specific;
6388         u64 dma_offset, qm_offset;
6389         dma_addr_t dma_addr;
6390         void *kernel_addr;
6391         bool is_eng_idle;
6392         int rc = 0, dma_id;
6393
6394         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6395                                                 hdev, SZ_2M,
6396                                                 &dma_addr,
6397                                                 GFP_KERNEL | __GFP_ZERO);
6398
6399         if (!kernel_addr)
6400                 return -ENOMEM;
6401
6402         mutex_lock(&gaudi->clk_gate_mutex);
6403
6404         hdev->asic_funcs->disable_clock_gating(hdev);
6405
6406         hdev->asic_funcs->hw_queues_lock(hdev);
6407
6408         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6409         dma_offset = dma_id * DMA_CORE_OFFSET;
6410         qm_offset = dma_id * DMA_QMAN_OFFSET;
6411         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6412         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6413
6414         if (!is_eng_idle) {
6415                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6416                 dma_offset = dma_id * DMA_CORE_OFFSET;
6417                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6418                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6419                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6420
6421                 if (!is_eng_idle) {
6422                         dev_err_ratelimited(hdev->dev,
6423                                 "Can't read via DMA because it is BUSY\n");
6424                         rc = -EAGAIN;
6425                         goto out;
6426                 }
6427         }
6428
6429         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6430         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6431                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6432
6433         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6434          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6435          * ASID
6436          */
6437         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6438
6439         /* Verify DMA is OK */
6440         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6441         if (err_cause) {
6442                 dev_dbg(hdev->dev,
6443                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6444                         err_cause);
6445                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6446         }
6447
6448         pos = 0;
6449         size_left = size;
6450         size_to_dma = SZ_2M;
6451
6452         while (size_left > 0) {
6453
6454                 if (size_left < SZ_2M)
6455                         size_to_dma = size_left;
6456
6457                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6458                                                 dma_addr);
6459                 if (rc)
6460                         break;
6461
6462                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6463
6464                 if (size_left <= SZ_2M)
6465                         break;
6466
6467                 pos += SZ_2M;
6468                 addr += SZ_2M;
6469                 size_left -= SZ_2M;
6470         }
6471
6472         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6473          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6474          * ASID
6475          */
6476         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6477                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6478
6479         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6480
6481 out:
6482         hdev->asic_funcs->hw_queues_unlock(hdev);
6483
6484         hdev->asic_funcs->set_clock_gating(hdev);
6485
6486         mutex_unlock(&gaudi->clk_gate_mutex);
6487
6488         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6489                                                 dma_addr);
6490
6491         return rc;
6492 }
6493
6494 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6495 {
6496         struct gaudi_device *gaudi = hdev->asic_specific;
6497
6498         if (hdev->hard_reset_pending)
6499                 return U64_MAX;
6500
6501         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6502                         (addr - gaudi->hbm_bar_cur_addr));
6503 }
6504
6505 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6506 {
6507         struct gaudi_device *gaudi = hdev->asic_specific;
6508
6509         if (hdev->hard_reset_pending)
6510                 return;
6511
6512         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6513                         (addr - gaudi->hbm_bar_cur_addr));
6514 }
6515
6516 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6517 {
6518         /* mask to zero the MMBP and ASID bits */
6519         WREG32_AND(reg, ~0x7FF);
6520         WREG32_OR(reg, asid);
6521 }
6522
6523 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6524 {
6525         struct gaudi_device *gaudi = hdev->asic_specific;
6526
6527         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6528                 return;
6529
6530         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6531                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6532                 return;
6533         }
6534
6535         mutex_lock(&gaudi->clk_gate_mutex);
6536
6537         hdev->asic_funcs->disable_clock_gating(hdev);
6538
6539         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6540         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6541         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6542         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6543         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6544
6545         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6547         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6548         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6550
6551         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6555         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6556
6557         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6560         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6562
6563         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6568
6569         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6574
6575         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6580
6581         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6586
6587         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6595
6596         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6597         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6598         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6603
6604         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6605         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6606         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6611
6612         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6613         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6614         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6619
6620         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6621         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6622         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6626         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6627
6628         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6629         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6630         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6635
6636         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6637         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6638         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6643
6644         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6645         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6646         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6649         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6650         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6651
6652         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6656         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6657         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6658         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6659
6660         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6665         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6669         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6670
6671         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6672         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6673         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6674         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6675         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6676         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6677         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6678         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6679         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6680         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6681         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6682         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6683
6684         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6685                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6686                                 asid);
6687                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6688                                 asid);
6689                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6690                                 asid);
6691                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6692                                 asid);
6693                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6694                                 asid);
6695         }
6696
6697         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6698                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6699                                 asid);
6700                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6701                                 asid);
6702                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6703                                 asid);
6704                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6705                                 asid);
6706                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6707                                 asid);
6708         }
6709
6710         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6711                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6712                                 asid);
6713                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6714                                 asid);
6715                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6716                                 asid);
6717                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6718                                 asid);
6719                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6720                                 asid);
6721         }
6722
6723         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6724                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6725                                 asid);
6726                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6727                                 asid);
6728                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6729                                 asid);
6730                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6731                                 asid);
6732                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6733                                 asid);
6734         }
6735
6736         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6737                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6738                                 asid);
6739                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6740                                 asid);
6741                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6742                                 asid);
6743                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6744                                 asid);
6745                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6746                                 asid);
6747         }
6748
6749         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6750                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6751                                 asid);
6752                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6753                                 asid);
6754                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6755                                 asid);
6756                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6757                                 asid);
6758                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6759                                 asid);
6760         }
6761
6762         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6763                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6764                                 asid);
6765                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6766                                 asid);
6767                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6768                                 asid);
6769                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6770                                 asid);
6771                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6772                                 asid);
6773         }
6774
6775         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6776                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6777                                 asid);
6778                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6779                                 asid);
6780                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6781                                 asid);
6782                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6783                                 asid);
6784                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6785                                 asid);
6786         }
6787
6788         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6789                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6790                                 asid);
6791                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6792                                 asid);
6793                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6794                                 asid);
6795                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6796                                 asid);
6797                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6798                                 asid);
6799         }
6800
6801         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6802                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6803                                 asid);
6804                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6805                                 asid);
6806                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6807                                 asid);
6808                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6809                                 asid);
6810                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6811                                 asid);
6812         }
6813
6814         hdev->asic_funcs->set_clock_gating(hdev);
6815
6816         mutex_unlock(&gaudi->clk_gate_mutex);
6817 }
6818
6819 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6820                 struct hl_cs_job *job)
6821 {
6822         struct packet_msg_prot *fence_pkt;
6823         u32 *fence_ptr;
6824         dma_addr_t fence_dma_addr;
6825         struct hl_cb *cb;
6826         u32 tmp, timeout, dma_offset;
6827         int rc;
6828
6829         if (hdev->pldm)
6830                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6831         else
6832                 timeout = HL_DEVICE_TIMEOUT_USEC;
6833
6834         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6835                 dev_err_ratelimited(hdev->dev,
6836                         "Can't send driver job on QMAN0 because the device is not idle\n");
6837                 return -EBUSY;
6838         }
6839
6840         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6841                                                         &fence_dma_addr);
6842         if (!fence_ptr) {
6843                 dev_err(hdev->dev,
6844                         "Failed to allocate fence memory for QMAN0\n");
6845                 return -ENOMEM;
6846         }
6847
6848         cb = job->patched_cb;
6849
6850         fence_pkt = cb->kernel_address +
6851                         job->job_cb_size - sizeof(struct packet_msg_prot);
6852
6853         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6854         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6855         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6856
6857         fence_pkt->ctl = cpu_to_le32(tmp);
6858         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6859         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6860
6861         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6862
6863         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6864
6865         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6866                                         job->job_cb_size, cb->bus_address);
6867         if (rc) {
6868                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6869                 goto free_fence_ptr;
6870         }
6871
6872         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6873                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6874                                 timeout, true);
6875
6876         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6877
6878         if (rc == -ETIMEDOUT) {
6879                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6880                 goto free_fence_ptr;
6881         }
6882
6883 free_fence_ptr:
6884         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6885                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6886
6887         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6888                                         fence_dma_addr);
6889         return rc;
6890 }
6891
6892 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6893 {
6894         if (event_type >= GAUDI_EVENT_SIZE)
6895                 goto event_not_supported;
6896
6897         if (!gaudi_irq_map_table[event_type].valid)
6898                 goto event_not_supported;
6899
6900         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6901
6902         return;
6903
6904 event_not_supported:
6905         snprintf(desc, size, "N/A");
6906 }
6907
6908 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6909                                                         u32 x_y, bool is_write)
6910 {
6911         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6912
6913         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6914                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6915
6916         switch (x_y) {
6917         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6918         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6919                 dma_id[0] = 0;
6920                 dma_id[1] = 2;
6921                 break;
6922         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6923         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6924                 dma_id[0] = 1;
6925                 dma_id[1] = 3;
6926                 break;
6927         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6928         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6929                 dma_id[0] = 4;
6930                 dma_id[1] = 6;
6931                 break;
6932         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6933         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6934                 dma_id[0] = 5;
6935                 dma_id[1] = 7;
6936                 break;
6937         default:
6938                 goto unknown_initiator;
6939         }
6940
6941         for (i = 0 ; i < 2 ; i++) {
6942                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6943                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6944         }
6945
6946         switch (x_y) {
6947         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6948         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6949                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6950                         return "DMA0";
6951                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6952                         return "DMA2";
6953                 else
6954                         return "DMA0 or DMA2";
6955         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6956         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6957                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6958                         return "DMA1";
6959                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6960                         return "DMA3";
6961                 else
6962                         return "DMA1 or DMA3";
6963         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6964         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6965                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6966                         return "DMA4";
6967                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6968                         return "DMA6";
6969                 else
6970                         return "DMA4 or DMA6";
6971         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6972         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6973                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6974                         return "DMA5";
6975                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6976                         return "DMA7";
6977                 else
6978                         return "DMA5 or DMA7";
6979         }
6980
6981 unknown_initiator:
6982         return "unknown initiator";
6983 }
6984
6985 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6986                                                         bool is_write)
6987 {
6988         u32 val, x_y, axi_id;
6989
6990         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6991                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6992         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6993                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6994         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6995                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6996
6997         switch (x_y) {
6998         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6999                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7000                         return "TPC0";
7001                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7002                         return "NIC0";
7003                 break;
7004         case RAZWI_INITIATOR_ID_X_Y_TPC1:
7005                 return "TPC1";
7006         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
7007         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
7008                 return "MME0";
7009         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
7010         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7011                 return "MME1";
7012         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7013                 return "TPC2";
7014         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7015                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7016                         return "TPC3";
7017                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7018                         return "PCI";
7019                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7020                         return "CPU";
7021                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7022                         return "PSOC";
7023                 break;
7024         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7025         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7026         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7027         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7028         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7029         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7030         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7031         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7032                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7033         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7034                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7035                         return "TPC4";
7036                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7037                         return "NIC1";
7038                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7039                         return "NIC2";
7040                 break;
7041         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7042                 return "TPC5";
7043         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7044         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7045                 return "MME2";
7046         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7047         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7048                 return "MME3";
7049         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7050                 return "TPC6";
7051         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7052                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7053                         return "TPC7";
7054                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7055                         return "NIC4";
7056                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7057                         return "NIC5";
7058                 break;
7059         default:
7060                 break;
7061         }
7062
7063         dev_err(hdev->dev,
7064                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7065                 val,
7066                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7067                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7068                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7069                         RAZWI_INITIATOR_AXI_ID_MASK);
7070
7071         return "unknown initiator";
7072 }
7073
7074 static void gaudi_print_razwi_info(struct hl_device *hdev)
7075 {
7076         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7077                 dev_err_ratelimited(hdev->dev,
7078                         "RAZWI event caused by illegal write of %s\n",
7079                         gaudi_get_razwi_initiator_name(hdev, true));
7080                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7081         }
7082
7083         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7084                 dev_err_ratelimited(hdev->dev,
7085                         "RAZWI event caused by illegal read of %s\n",
7086                         gaudi_get_razwi_initiator_name(hdev, false));
7087                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7088         }
7089 }
7090
7091 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7092 {
7093         struct gaudi_device *gaudi = hdev->asic_specific;
7094         u64 addr;
7095         u32 val;
7096
7097         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7098                 return;
7099
7100         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7101         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7102                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7103                 addr <<= 32;
7104                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7105
7106                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7107                                         addr);
7108
7109                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7110         }
7111
7112         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7113         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7114                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7115                 addr <<= 32;
7116                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7117
7118                 dev_err_ratelimited(hdev->dev,
7119                                 "MMU access error on va 0x%llx\n", addr);
7120
7121                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7122         }
7123 }
7124
7125 /*
7126  *  +-------------------+------------------------------------------------------+
7127  *  | Configuration Reg |                     Description                      |
7128  *  |      Address      |                                                      |
7129  *  +-------------------+------------------------------------------------------+
7130  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7131  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7132  *  |                   |0xF34 memory wrappers 63:32                           |
7133  *  |                   |0xF38 memory wrappers 95:64                           |
7134  *  |                   |0xF3C memory wrappers 127:96                          |
7135  *  +-------------------+------------------------------------------------------+
7136  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7137  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7138  *  |                   |0xF44 memory wrappers 63:32                           |
7139  *  |                   |0xF48 memory wrappers 95:64                           |
7140  *  |                   |0xF4C memory wrappers 127:96                          |
7141  *  +-------------------+------------------------------------------------------+
7142  */
7143 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7144                 struct ecc_info_extract_params *params, u64 *ecc_address,
7145                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7146 {
7147         struct gaudi_device *gaudi = hdev->asic_specific;
7148         u32 i, num_mem_regs, reg, err_bit;
7149         u64 err_addr, err_word = 0;
7150         int rc = 0;
7151
7152         num_mem_regs = params->num_memories / 32 +
7153                         ((params->num_memories % 32) ? 1 : 0);
7154
7155         if (params->block_address >= CFG_BASE)
7156                 params->block_address -= CFG_BASE;
7157
7158         if (params->derr)
7159                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7160         else
7161                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7162
7163         if (params->disable_clock_gating) {
7164                 mutex_lock(&gaudi->clk_gate_mutex);
7165                 hdev->asic_funcs->disable_clock_gating(hdev);
7166         }
7167
7168         /* Set invalid wrapper index */
7169         *memory_wrapper_idx = 0xFF;
7170
7171         /* Iterate through memory wrappers, a single bit must be set */
7172         for (i = 0 ; i < num_mem_regs ; i++) {
7173                 err_addr += i * 4;
7174                 err_word = RREG32(err_addr);
7175                 if (err_word) {
7176                         err_bit = __ffs(err_word);
7177                         *memory_wrapper_idx = err_bit + (32 * i);
7178                         break;
7179                 }
7180         }
7181
7182         if (*memory_wrapper_idx == 0xFF) {
7183                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7184                 rc = -EINVAL;
7185                 goto enable_clk_gate;
7186         }
7187
7188         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7189                         *memory_wrapper_idx);
7190
7191         *ecc_address =
7192                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7193         *ecc_syndrom =
7194                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7195
7196         /* Clear error indication */
7197         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7198         if (params->derr)
7199                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7200         else
7201                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7202
7203         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7204
7205 enable_clk_gate:
7206         if (params->disable_clock_gating) {
7207                 hdev->asic_funcs->set_clock_gating(hdev);
7208
7209                 mutex_unlock(&gaudi->clk_gate_mutex);
7210         }
7211
7212         return rc;
7213 }
7214
7215 /*
7216  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7217  *
7218  * @idx: the current pi/ci value
7219  * @q_len: the queue length (power of 2)
7220  *
7221  * @return the cyclically decremented index
7222  */
7223 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7224 {
7225         u32 mask = q_len - 1;
7226
7227         /*
7228          * modular decrement is equivalent to adding (queue_size -1)
7229          * later we take LSBs to make sure the value is in the
7230          * range [0, queue_len - 1]
7231          */
7232         return (idx + q_len - 1) & mask;
7233 }
7234
7235 /**
7236  * gaudi_print_sw_config_stream_data - print SW config stream data
7237  *
7238  * @hdev: pointer to the habanalabs device structure
7239  * @stream: the QMAN's stream
7240  * @qman_base: base address of QMAN registers block
7241  */
7242 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7243                                                 u64 qman_base)
7244 {
7245         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7246         u32 cq_ptr_lo_off, size;
7247
7248         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7249
7250         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7251                                                 stream * cq_ptr_lo_off;
7252         cq_ptr_hi = cq_ptr_lo +
7253                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7254         cq_tsize = cq_ptr_lo +
7255                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7256
7257         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7258         size = RREG32(cq_tsize);
7259         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7260                                                         stream, cq_ptr, size);
7261 }
7262
7263 /**
7264  * gaudi_print_last_pqes_on_err - print last PQEs on error
7265  *
7266  * @hdev: pointer to the habanalabs device structure
7267  * @qid_base: first QID of the QMAN (out of 4 streams)
7268  * @stream: the QMAN's stream
7269  * @qman_base: base address of QMAN registers block
7270  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7271  */
7272 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7273                                                 u32 stream, u64 qman_base,
7274                                                 bool pr_sw_conf)
7275 {
7276         u32 ci, qm_ci_stream_off, queue_len;
7277         struct hl_hw_queue *q;
7278         u64 pq_ci;
7279         int i;
7280
7281         q = &hdev->kernel_queues[qid_base + stream];
7282
7283         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7284         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7285                                                 stream * qm_ci_stream_off;
7286
7287         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7288                                         q->int_queue_len : HL_QUEUE_LENGTH;
7289
7290         hdev->asic_funcs->hw_queues_lock(hdev);
7291
7292         if (pr_sw_conf)
7293                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7294
7295         ci = RREG32(pq_ci);
7296
7297         /* we should start printing form ci -1 */
7298         ci = gaudi_queue_idx_dec(ci, queue_len);
7299
7300         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7301                 struct hl_bd *bd;
7302                 u64 addr;
7303                 u32 len;
7304
7305                 bd = q->kernel_address;
7306                 bd += ci;
7307
7308                 len = le32_to_cpu(bd->len);
7309                 /* len 0 means uninitialized entry- break */
7310                 if (!len)
7311                         break;
7312
7313                 addr = le64_to_cpu(bd->ptr);
7314
7315                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7316                                                         stream, ci, addr, len);
7317
7318                 /* get previous ci, wrap if needed */
7319                 ci = gaudi_queue_idx_dec(ci, queue_len);
7320         }
7321
7322         hdev->asic_funcs->hw_queues_unlock(hdev);
7323 }
7324
7325 /**
7326  * print_qman_data_on_err - extract QMAN data on error
7327  *
7328  * @hdev: pointer to the habanalabs device structure
7329  * @qid_base: first QID of the QMAN (out of 4 streams)
7330  * @stream: the QMAN's stream
7331  * @qman_base: base address of QMAN registers block
7332  *
7333  * This function attempt to exatract as much data as possible on QMAN error.
7334  * On upper CP print the SW config stream data and last 8 PQEs.
7335  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7336  */
7337 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7338                                                 u32 stream, u64 qman_base)
7339 {
7340         u32 i;
7341
7342         if (stream != QMAN_STREAMS) {
7343                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7344                                                                         true);
7345                 return;
7346         }
7347
7348         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7349
7350         for (i = 0; i < QMAN_STREAMS; i++)
7351                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7352                                                                         false);
7353 }
7354
7355 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7356                                           const char *qm_name,
7357                                           u64 qman_base,
7358                                           u32 qid_base)
7359 {
7360         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7361         u64 glbl_sts_addr, arb_err_addr;
7362         char reg_desc[32];
7363
7364         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7365         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7366
7367         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7368         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7369                 glbl_sts_clr_val = 0;
7370                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7371
7372                 if (!glbl_sts_val)
7373                         continue;
7374
7375                 if (i == QMAN_STREAMS)
7376                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7377                 else
7378                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7379
7380                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7381                         if (glbl_sts_val & BIT(j)) {
7382                                 dev_err_ratelimited(hdev->dev,
7383                                                 "%s %s. err cause: %s\n",
7384                                                 qm_name, reg_desc,
7385                                                 gaudi_qman_error_cause[j]);
7386                                 glbl_sts_clr_val |= BIT(j);
7387                         }
7388                 }
7389
7390                 /* Write 1 clear errors */
7391                 if (!hdev->stop_on_err)
7392                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7393                 else
7394                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7395         }
7396
7397         arb_err_val = RREG32(arb_err_addr);
7398
7399         if (!arb_err_val)
7400                 return;
7401
7402         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7403                 if (arb_err_val & BIT(j)) {
7404                         dev_err_ratelimited(hdev->dev,
7405                                         "%s ARB_ERR. err cause: %s\n",
7406                                         qm_name,
7407                                         gaudi_qman_arb_error_cause[j]);
7408                 }
7409         }
7410 }
7411
7412 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7413                 struct hl_eq_sm_sei_data *sei_data)
7414 {
7415         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7416
7417         switch (sei_data->sei_cause) {
7418         case SM_SEI_SO_OVERFLOW:
7419                 dev_err(hdev->dev,
7420                         "SM %u SEI Error: SO %u overflow/underflow",
7421                         index, le32_to_cpu(sei_data->sei_log));
7422                 break;
7423         case SM_SEI_LBW_4B_UNALIGNED:
7424                 dev_err(hdev->dev,
7425                         "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7426                         index, le32_to_cpu(sei_data->sei_log));
7427                 break;
7428         case SM_SEI_AXI_RESPONSE_ERR:
7429                 dev_err(hdev->dev,
7430                         "SM %u SEI Error: AXI ID %u response error",
7431                         index, le32_to_cpu(sei_data->sei_log));
7432                 break;
7433         default:
7434                 dev_err(hdev->dev, "Unknown SM SEI cause %u",
7435                                 le32_to_cpu(sei_data->sei_log));
7436                 break;
7437         }
7438 }
7439
7440 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7441                 struct hl_eq_ecc_data *ecc_data)
7442 {
7443         struct ecc_info_extract_params params;
7444         u64 ecc_address = 0, ecc_syndrom = 0;
7445         u8 index, memory_wrapper_idx = 0;
7446         bool extract_info_from_fw;
7447         int rc;
7448
7449         switch (event_type) {
7450         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7451         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7452                 extract_info_from_fw = true;
7453                 break;
7454         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7455                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7456                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7457                 params.num_memories = 90;
7458                 params.derr = false;
7459                 params.disable_clock_gating = true;
7460                 extract_info_from_fw = false;
7461                 break;
7462         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7463                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7464                 params.block_address =
7465                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7466                 params.num_memories = 90;
7467                 params.derr = true;
7468                 params.disable_clock_gating = true;
7469                 extract_info_from_fw = false;
7470                 break;
7471         case GAUDI_EVENT_MME0_ACC_SERR:
7472         case GAUDI_EVENT_MME1_ACC_SERR:
7473         case GAUDI_EVENT_MME2_ACC_SERR:
7474         case GAUDI_EVENT_MME3_ACC_SERR:
7475                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7476                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7477                 params.num_memories = 128;
7478                 params.derr = false;
7479                 params.disable_clock_gating = true;
7480                 extract_info_from_fw = false;
7481                 break;
7482         case GAUDI_EVENT_MME0_ACC_DERR:
7483         case GAUDI_EVENT_MME1_ACC_DERR:
7484         case GAUDI_EVENT_MME2_ACC_DERR:
7485         case GAUDI_EVENT_MME3_ACC_DERR:
7486                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7487                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7488                 params.num_memories = 128;
7489                 params.derr = true;
7490                 params.disable_clock_gating = true;
7491                 extract_info_from_fw = false;
7492                 break;
7493         case GAUDI_EVENT_MME0_SBAB_SERR:
7494         case GAUDI_EVENT_MME1_SBAB_SERR:
7495         case GAUDI_EVENT_MME2_SBAB_SERR:
7496         case GAUDI_EVENT_MME3_SBAB_SERR:
7497                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7498                 params.block_address =
7499                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7500                 params.num_memories = 33;
7501                 params.derr = false;
7502                 params.disable_clock_gating = true;
7503                 extract_info_from_fw = false;
7504                 break;
7505         case GAUDI_EVENT_MME0_SBAB_DERR:
7506         case GAUDI_EVENT_MME1_SBAB_DERR:
7507         case GAUDI_EVENT_MME2_SBAB_DERR:
7508         case GAUDI_EVENT_MME3_SBAB_DERR:
7509                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7510                 params.block_address =
7511                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7512                 params.num_memories = 33;
7513                 params.derr = true;
7514                 params.disable_clock_gating = true;
7515                 extract_info_from_fw = false;
7516                 break;
7517         default:
7518                 return;
7519         }
7520
7521         if (extract_info_from_fw) {
7522                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7523                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7524                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7525         } else {
7526                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7527                                 &ecc_syndrom, &memory_wrapper_idx);
7528                 if (rc)
7529                         return;
7530         }
7531
7532         dev_err(hdev->dev,
7533                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7534                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7535 }
7536
7537 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7538 {
7539         u64 qman_base;
7540         char desc[32];
7541         u32 qid_base;
7542         u8 index;
7543
7544         switch (event_type) {
7545         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7546                 index = event_type - GAUDI_EVENT_TPC0_QM;
7547                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7548                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7549                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7550                 break;
7551         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7552                 index = event_type - GAUDI_EVENT_MME0_QM;
7553                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7554                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7555                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7556                 break;
7557         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7558                 index = event_type - GAUDI_EVENT_DMA0_QM;
7559                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7560                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7561                 if (index > 1)
7562                         qid_base++;
7563                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7564                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7565                 break;
7566         case GAUDI_EVENT_NIC0_QM0:
7567                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7568                 qman_base = mmNIC0_QM0_BASE;
7569                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7570                 break;
7571         case GAUDI_EVENT_NIC0_QM1:
7572                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7573                 qman_base = mmNIC0_QM1_BASE;
7574                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7575                 break;
7576         case GAUDI_EVENT_NIC1_QM0:
7577                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7578                 qman_base = mmNIC1_QM0_BASE;
7579                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7580                 break;
7581         case GAUDI_EVENT_NIC1_QM1:
7582                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7583                 qman_base = mmNIC1_QM1_BASE;
7584                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7585                 break;
7586         case GAUDI_EVENT_NIC2_QM0:
7587                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7588                 qman_base = mmNIC2_QM0_BASE;
7589                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7590                 break;
7591         case GAUDI_EVENT_NIC2_QM1:
7592                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7593                 qman_base = mmNIC2_QM1_BASE;
7594                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7595                 break;
7596         case GAUDI_EVENT_NIC3_QM0:
7597                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7598                 qman_base = mmNIC3_QM0_BASE;
7599                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7600                 break;
7601         case GAUDI_EVENT_NIC3_QM1:
7602                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7603                 qman_base = mmNIC3_QM1_BASE;
7604                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7605                 break;
7606         case GAUDI_EVENT_NIC4_QM0:
7607                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7608                 qman_base = mmNIC4_QM0_BASE;
7609                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7610                 break;
7611         case GAUDI_EVENT_NIC4_QM1:
7612                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7613                 qman_base = mmNIC4_QM1_BASE;
7614                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7615                 break;
7616         default:
7617                 return;
7618         }
7619
7620         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7621 }
7622
7623 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7624                                         bool razwi)
7625 {
7626         char desc[64] = "";
7627
7628         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7629         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7630                 event_type, desc);
7631
7632         if (razwi) {
7633                 gaudi_print_razwi_info(hdev);
7634                 gaudi_print_mmu_error_info(hdev);
7635         }
7636 }
7637
7638 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7639                                         struct cpucp_pkt_sync_err *sync_err)
7640 {
7641         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7642
7643         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7644                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7645 }
7646
7647 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7648                                         struct hl_eq_fw_alive *fw_alive)
7649 {
7650         dev_err(hdev->dev,
7651                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7652                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7653                 "Minor" : "Critical", fw_alive->process_id,
7654                 fw_alive->thread_id, fw_alive->uptime_seconds);
7655 }
7656
7657 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7658 {
7659         struct gaudi_device *gaudi = hdev->asic_specific;
7660
7661         /* Unmask all IRQs since some could have been received
7662          * during the soft reset
7663          */
7664         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7665 }
7666
7667 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7668                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7669 {
7670         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7671         int rc = 0;
7672
7673         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7674                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7675                 if (!hbm_ecc_data) {
7676                         dev_err(hdev->dev, "No FW ECC data");
7677                         return 0;
7678                 }
7679
7680                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7681                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7682                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7683                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7684                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7685                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7686                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7687                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7688                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7689                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7690                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7691                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7692                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7693                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7694
7695                 dev_err(hdev->dev,
7696                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7697                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7698                 dev_err(hdev->dev,
7699                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7700                         device, ch, hbm_ecc_data->first_addr, type,
7701                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7702                         hbm_ecc_data->dec_cnt);
7703                 return 0;
7704         }
7705
7706         if (hdev->asic_prop.fw_security_enabled) {
7707                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7708                 return 0;
7709         }
7710
7711         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7712         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7713                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7714                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7715                 if (val) {
7716                         rc = -EIO;
7717                         dev_err(hdev->dev,
7718                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7719                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7720                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7721                                 (val >> 4) & 0x1);
7722
7723                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7724                         dev_err(hdev->dev,
7725                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7726                                 device, ch * 2,
7727                                 RREG32(base + ch * 0x1000 + 0x064),
7728                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7729                                 (val2 & 0xFF0000) >> 16,
7730                                 (val2 & 0xFF000000) >> 24);
7731                 }
7732
7733                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7734                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7735                 if (val) {
7736                         rc = -EIO;
7737                         dev_err(hdev->dev,
7738                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7739                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7740                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7741                                 (val >> 4) & 0x1);
7742
7743                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7744                         dev_err(hdev->dev,
7745                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7746                                 device, ch * 2 + 1,
7747                                 RREG32(base + ch * 0x1000 + 0x074),
7748                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7749                                 (val2 & 0xFF0000) >> 16,
7750                                 (val2 & 0xFF000000) >> 24);
7751                 }
7752
7753                 /* Clear interrupts */
7754                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7755                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7756                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7757                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7758                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7759                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7760         }
7761
7762         val  = RREG32(base + 0x8F30);
7763         val2 = RREG32(base + 0x8F34);
7764         if (val | val2) {
7765                 rc = -EIO;
7766                 dev_err(hdev->dev,
7767                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7768                         device, val, val2);
7769         }
7770         val  = RREG32(base + 0x8F40);
7771         val2 = RREG32(base + 0x8F44);
7772         if (val | val2) {
7773                 rc = -EIO;
7774                 dev_err(hdev->dev,
7775                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7776                         device, val, val2);
7777         }
7778
7779         return rc;
7780 }
7781
7782 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7783 {
7784         switch (hbm_event_type) {
7785         case GAUDI_EVENT_HBM0_SPI_0:
7786         case GAUDI_EVENT_HBM0_SPI_1:
7787                 return 0;
7788         case GAUDI_EVENT_HBM1_SPI_0:
7789         case GAUDI_EVENT_HBM1_SPI_1:
7790                 return 1;
7791         case GAUDI_EVENT_HBM2_SPI_0:
7792         case GAUDI_EVENT_HBM2_SPI_1:
7793                 return 2;
7794         case GAUDI_EVENT_HBM3_SPI_0:
7795         case GAUDI_EVENT_HBM3_SPI_1:
7796                 return 3;
7797         default:
7798                 break;
7799         }
7800
7801         /* Should never happen */
7802         return 0;
7803 }
7804
7805 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7806                                         char *interrupt_name)
7807 {
7808         struct gaudi_device *gaudi = hdev->asic_specific;
7809         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7810         bool soft_reset_required = false;
7811
7812         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7813          * gating, and thus cannot be done in CPU-CP and should be done instead
7814          * by the driver.
7815          */
7816
7817         mutex_lock(&gaudi->clk_gate_mutex);
7818
7819         hdev->asic_funcs->disable_clock_gating(hdev);
7820
7821         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7822                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7823
7824         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7825                 if (tpc_interrupts_cause & BIT(i)) {
7826                         dev_err_ratelimited(hdev->dev,
7827                                         "TPC%d_%s interrupt cause: %s\n",
7828                                         tpc_id, interrupt_name,
7829                                         gaudi_tpc_interrupts_cause[i]);
7830                         /* If this is QM error, we need to soft-reset */
7831                         if (i == 15)
7832                                 soft_reset_required = true;
7833                 }
7834
7835         /* Clear interrupts */
7836         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7837
7838         hdev->asic_funcs->set_clock_gating(hdev);
7839
7840         mutex_unlock(&gaudi->clk_gate_mutex);
7841
7842         return soft_reset_required;
7843 }
7844
7845 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7846 {
7847         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7848 }
7849
7850 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7851 {
7852         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7853 }
7854
7855 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7856                                         u16 event_type)
7857 {
7858         switch (event_type) {
7859         case GAUDI_EVENT_FIX_POWER_ENV_S:
7860                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7861                 dev_info_ratelimited(hdev->dev,
7862                         "Clock throttling due to power consumption\n");
7863                 break;
7864
7865         case GAUDI_EVENT_FIX_POWER_ENV_E:
7866                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7867                 dev_info_ratelimited(hdev->dev,
7868                         "Power envelop is safe, back to optimal clock\n");
7869                 break;
7870
7871         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7872                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7873                 dev_info_ratelimited(hdev->dev,
7874                         "Clock throttling due to overheating\n");
7875                 break;
7876
7877         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7878                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7879                 dev_info_ratelimited(hdev->dev,
7880                         "Thermal envelop is safe, back to optimal clock\n");
7881                 break;
7882
7883         default:
7884                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7885                         event_type);
7886                 break;
7887         }
7888 }
7889
7890 static void gaudi_handle_eqe(struct hl_device *hdev,
7891                                 struct hl_eq_entry *eq_entry)
7892 {
7893         struct gaudi_device *gaudi = hdev->asic_specific;
7894         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7895         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7896                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7897         u8 cause;
7898         bool reset_required;
7899
7900         gaudi->events_stat[event_type]++;
7901         gaudi->events_stat_aggregate[event_type]++;
7902
7903         switch (event_type) {
7904         case GAUDI_EVENT_PCIE_CORE_DERR:
7905         case GAUDI_EVENT_PCIE_IF_DERR:
7906         case GAUDI_EVENT_PCIE_PHY_DERR:
7907         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7908         case GAUDI_EVENT_MME0_ACC_DERR:
7909         case GAUDI_EVENT_MME0_SBAB_DERR:
7910         case GAUDI_EVENT_MME1_ACC_DERR:
7911         case GAUDI_EVENT_MME1_SBAB_DERR:
7912         case GAUDI_EVENT_MME2_ACC_DERR:
7913         case GAUDI_EVENT_MME2_SBAB_DERR:
7914         case GAUDI_EVENT_MME3_ACC_DERR:
7915         case GAUDI_EVENT_MME3_SBAB_DERR:
7916         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7917                 fallthrough;
7918         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7919         case GAUDI_EVENT_PSOC_MEM_DERR:
7920         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7921         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7922         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7923         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7924         case GAUDI_EVENT_MMU_DERR:
7925         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7926                 gaudi_print_irq_info(hdev, event_type, true);
7927                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7928                 goto reset_device;
7929
7930         case GAUDI_EVENT_GIC500:
7931         case GAUDI_EVENT_AXI_ECC:
7932         case GAUDI_EVENT_L2_RAM_ECC:
7933         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7934                 gaudi_print_irq_info(hdev, event_type, false);
7935                 goto reset_device;
7936
7937         case GAUDI_EVENT_HBM0_SPI_0:
7938         case GAUDI_EVENT_HBM1_SPI_0:
7939         case GAUDI_EVENT_HBM2_SPI_0:
7940         case GAUDI_EVENT_HBM3_SPI_0:
7941                 gaudi_print_irq_info(hdev, event_type, false);
7942                 gaudi_hbm_read_interrupts(hdev,
7943                                 gaudi_hbm_event_to_dev(event_type),
7944                                 &eq_entry->hbm_ecc_data);
7945                 goto reset_device;
7946
7947         case GAUDI_EVENT_HBM0_SPI_1:
7948         case GAUDI_EVENT_HBM1_SPI_1:
7949         case GAUDI_EVENT_HBM2_SPI_1:
7950         case GAUDI_EVENT_HBM3_SPI_1:
7951                 gaudi_print_irq_info(hdev, event_type, false);
7952                 gaudi_hbm_read_interrupts(hdev,
7953                                 gaudi_hbm_event_to_dev(event_type),
7954                                 &eq_entry->hbm_ecc_data);
7955                 hl_fw_unmask_irq(hdev, event_type);
7956                 break;
7957
7958         case GAUDI_EVENT_TPC0_DEC:
7959         case GAUDI_EVENT_TPC1_DEC:
7960         case GAUDI_EVENT_TPC2_DEC:
7961         case GAUDI_EVENT_TPC3_DEC:
7962         case GAUDI_EVENT_TPC4_DEC:
7963         case GAUDI_EVENT_TPC5_DEC:
7964         case GAUDI_EVENT_TPC6_DEC:
7965         case GAUDI_EVENT_TPC7_DEC:
7966                 gaudi_print_irq_info(hdev, event_type, true);
7967                 reset_required = gaudi_tpc_read_interrupts(hdev,
7968                                         tpc_dec_event_to_tpc_id(event_type),
7969                                         "AXI_SLV_DEC_Error");
7970                 if (reset_required) {
7971                         dev_err(hdev->dev, "hard reset required due to %s\n",
7972                                 gaudi_irq_map_table[event_type].name);
7973
7974                         goto reset_device;
7975                 } else {
7976                         hl_fw_unmask_irq(hdev, event_type);
7977                 }
7978                 break;
7979
7980         case GAUDI_EVENT_TPC0_KRN_ERR:
7981         case GAUDI_EVENT_TPC1_KRN_ERR:
7982         case GAUDI_EVENT_TPC2_KRN_ERR:
7983         case GAUDI_EVENT_TPC3_KRN_ERR:
7984         case GAUDI_EVENT_TPC4_KRN_ERR:
7985         case GAUDI_EVENT_TPC5_KRN_ERR:
7986         case GAUDI_EVENT_TPC6_KRN_ERR:
7987         case GAUDI_EVENT_TPC7_KRN_ERR:
7988                 gaudi_print_irq_info(hdev, event_type, true);
7989                 reset_required = gaudi_tpc_read_interrupts(hdev,
7990                                         tpc_krn_event_to_tpc_id(event_type),
7991                                         "KRN_ERR");
7992                 if (reset_required) {
7993                         dev_err(hdev->dev, "hard reset required due to %s\n",
7994                                 gaudi_irq_map_table[event_type].name);
7995
7996                         goto reset_device;
7997                 } else {
7998                         hl_fw_unmask_irq(hdev, event_type);
7999                 }
8000                 break;
8001
8002         case GAUDI_EVENT_PCIE_CORE_SERR:
8003         case GAUDI_EVENT_PCIE_IF_SERR:
8004         case GAUDI_EVENT_PCIE_PHY_SERR:
8005         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8006         case GAUDI_EVENT_MME0_ACC_SERR:
8007         case GAUDI_EVENT_MME0_SBAB_SERR:
8008         case GAUDI_EVENT_MME1_ACC_SERR:
8009         case GAUDI_EVENT_MME1_SBAB_SERR:
8010         case GAUDI_EVENT_MME2_ACC_SERR:
8011         case GAUDI_EVENT_MME2_SBAB_SERR:
8012         case GAUDI_EVENT_MME3_ACC_SERR:
8013         case GAUDI_EVENT_MME3_SBAB_SERR:
8014         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8015         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8016         case GAUDI_EVENT_PSOC_MEM_SERR:
8017         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8018         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8019         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8020         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8021                 fallthrough;
8022         case GAUDI_EVENT_MMU_SERR:
8023                 gaudi_print_irq_info(hdev, event_type, true);
8024                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8025                 hl_fw_unmask_irq(hdev, event_type);
8026                 break;
8027
8028         case GAUDI_EVENT_PCIE_DEC:
8029         case GAUDI_EVENT_MME0_WBC_RSP:
8030         case GAUDI_EVENT_MME0_SBAB0_RSP:
8031         case GAUDI_EVENT_MME1_WBC_RSP:
8032         case GAUDI_EVENT_MME1_SBAB0_RSP:
8033         case GAUDI_EVENT_MME2_WBC_RSP:
8034         case GAUDI_EVENT_MME2_SBAB0_RSP:
8035         case GAUDI_EVENT_MME3_WBC_RSP:
8036         case GAUDI_EVENT_MME3_SBAB0_RSP:
8037         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8038         case GAUDI_EVENT_PSOC_AXI_DEC:
8039         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8040         case GAUDI_EVENT_MMU_PAGE_FAULT:
8041         case GAUDI_EVENT_MMU_WR_PERM:
8042         case GAUDI_EVENT_RAZWI_OR_ADC:
8043         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8044         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8045         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8046                 fallthrough;
8047         case GAUDI_EVENT_NIC0_QM0:
8048         case GAUDI_EVENT_NIC0_QM1:
8049         case GAUDI_EVENT_NIC1_QM0:
8050         case GAUDI_EVENT_NIC1_QM1:
8051         case GAUDI_EVENT_NIC2_QM0:
8052         case GAUDI_EVENT_NIC2_QM1:
8053         case GAUDI_EVENT_NIC3_QM0:
8054         case GAUDI_EVENT_NIC3_QM1:
8055         case GAUDI_EVENT_NIC4_QM0:
8056         case GAUDI_EVENT_NIC4_QM1:
8057         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8058                 gaudi_print_irq_info(hdev, event_type, true);
8059                 gaudi_handle_qman_err(hdev, event_type);
8060                 hl_fw_unmask_irq(hdev, event_type);
8061                 break;
8062
8063         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8064                 gaudi_print_irq_info(hdev, event_type, true);
8065                 goto reset_device;
8066
8067         case GAUDI_EVENT_TPC0_BMON_SPMU:
8068         case GAUDI_EVENT_TPC1_BMON_SPMU:
8069         case GAUDI_EVENT_TPC2_BMON_SPMU:
8070         case GAUDI_EVENT_TPC3_BMON_SPMU:
8071         case GAUDI_EVENT_TPC4_BMON_SPMU:
8072         case GAUDI_EVENT_TPC5_BMON_SPMU:
8073         case GAUDI_EVENT_TPC6_BMON_SPMU:
8074         case GAUDI_EVENT_TPC7_BMON_SPMU:
8075         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8076                 gaudi_print_irq_info(hdev, event_type, false);
8077                 hl_fw_unmask_irq(hdev, event_type);
8078                 break;
8079
8080         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8081                 gaudi_print_irq_info(hdev, event_type, false);
8082                 gaudi_print_sm_sei_info(hdev, event_type,
8083                                         &eq_entry->sm_sei_data);
8084                 hl_fw_unmask_irq(hdev, event_type);
8085                 break;
8086
8087         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8088                 gaudi_print_clk_change_info(hdev, event_type);
8089                 hl_fw_unmask_irq(hdev, event_type);
8090                 break;
8091
8092         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8093                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8094                 dev_err(hdev->dev,
8095                         "Received high temp H/W interrupt %d (cause %d)\n",
8096                         event_type, cause);
8097                 break;
8098
8099         case GAUDI_EVENT_DEV_RESET_REQ:
8100                 gaudi_print_irq_info(hdev, event_type, false);
8101                 goto reset_device;
8102
8103         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8104                 gaudi_print_irq_info(hdev, event_type, false);
8105                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8106                 goto reset_device;
8107
8108         case GAUDI_EVENT_FW_ALIVE_S:
8109                 gaudi_print_irq_info(hdev, event_type, false);
8110                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8111                 goto reset_device;
8112
8113         default:
8114                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8115                                 event_type);
8116                 break;
8117         }
8118
8119         return;
8120
8121 reset_device:
8122         if (hdev->hard_reset_on_fw_events)
8123                 hl_device_reset(hdev, HL_RESET_HARD);
8124         else
8125                 hl_fw_unmask_irq(hdev, event_type);
8126 }
8127
8128 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8129                                         u32 *size)
8130 {
8131         struct gaudi_device *gaudi = hdev->asic_specific;
8132
8133         if (aggregate) {
8134                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8135                 return gaudi->events_stat_aggregate;
8136         }
8137
8138         *size = (u32) sizeof(gaudi->events_stat);
8139         return gaudi->events_stat;
8140 }
8141
8142 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8143                                         u32 flags)
8144 {
8145         struct gaudi_device *gaudi = hdev->asic_specific;
8146         u32 status, timeout_usec;
8147         int rc;
8148
8149         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8150                 hdev->hard_reset_pending)
8151                 return 0;
8152
8153         if (hdev->pldm)
8154                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8155         else
8156                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8157
8158         /* L0 & L1 invalidation */
8159         WREG32(mmSTLB_INV_PS, 3);
8160         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8161         WREG32(mmSTLB_INV_PS, 2);
8162
8163         rc = hl_poll_timeout(
8164                 hdev,
8165                 mmSTLB_INV_PS,
8166                 status,
8167                 !status,
8168                 1000,
8169                 timeout_usec);
8170
8171         WREG32(mmSTLB_INV_SET, 0);
8172
8173         if (rc) {
8174                 dev_err_ratelimited(hdev->dev,
8175                                         "MMU cache invalidation timeout\n");
8176                 hl_device_reset(hdev, HL_RESET_HARD);
8177         }
8178
8179         return rc;
8180 }
8181
8182 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8183                                                 bool is_hard, u32 flags,
8184                                                 u32 asid, u64 va, u64 size)
8185 {
8186         /* Treat as invalidate all because there is no range invalidation
8187          * in Gaudi
8188          */
8189         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8190 }
8191
8192 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8193                                         u32 asid, u64 phys_addr)
8194 {
8195         u32 status, timeout_usec;
8196         int rc;
8197
8198         if (hdev->pldm)
8199                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8200         else
8201                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8202
8203         WREG32(MMU_ASID, asid);
8204         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8205         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8206         WREG32(MMU_BUSY, 0x80000000);
8207
8208         rc = hl_poll_timeout(
8209                 hdev,
8210                 MMU_BUSY,
8211                 status,
8212                 !(status & 0x80000000),
8213                 1000,
8214                 timeout_usec);
8215
8216         if (rc) {
8217                 dev_err(hdev->dev,
8218                         "Timeout during MMU hop0 config of asid %d\n", asid);
8219                 return rc;
8220         }
8221
8222         return 0;
8223 }
8224
8225 static int gaudi_send_heartbeat(struct hl_device *hdev)
8226 {
8227         struct gaudi_device *gaudi = hdev->asic_specific;
8228
8229         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8230                 return 0;
8231
8232         return hl_fw_send_heartbeat(hdev);
8233 }
8234
8235 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8236 {
8237         struct gaudi_device *gaudi = hdev->asic_specific;
8238         struct asic_fixed_properties *prop = &hdev->asic_prop;
8239         int rc;
8240
8241         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8242                 return 0;
8243
8244         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8245                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8246                                         mmCPU_BOOT_ERR1);
8247         if (rc)
8248                 return rc;
8249
8250         if (!strlen(prop->cpucp_info.card_name))
8251                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8252                                 CARD_NAME_MAX_LEN);
8253
8254         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8255
8256         set_default_power_values(hdev);
8257
8258         hdev->max_power = prop->max_power_default;
8259
8260         return 0;
8261 }
8262
8263 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8264                                         u8 mask_len, struct seq_file *s)
8265 {
8266         struct gaudi_device *gaudi = hdev->asic_specific;
8267         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8268         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8269         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8270         unsigned long *mask = (unsigned long *)mask_arr;
8271         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8272         bool is_idle = true, is_eng_idle, is_slave;
8273         u64 offset;
8274         int i, dma_id, port;
8275
8276         mutex_lock(&gaudi->clk_gate_mutex);
8277
8278         hdev->asic_funcs->disable_clock_gating(hdev);
8279
8280         if (s)
8281                 seq_puts(s,
8282                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8283                         "---  -------  ------------  ----------  -------------\n");
8284
8285         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8286                 dma_id = gaudi_dma_assignment[i];
8287                 offset = dma_id * DMA_QMAN_OFFSET;
8288
8289                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8290                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8291                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8292                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8293                                 IS_DMA_IDLE(dma_core_sts0);
8294                 is_idle &= is_eng_idle;
8295
8296                 if (mask && !is_eng_idle)
8297                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8298                 if (s)
8299                         seq_printf(s, fmt, dma_id,
8300                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8301                                 qm_cgm_sts, dma_core_sts0);
8302         }
8303
8304         if (s)
8305                 seq_puts(s,
8306                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8307                         "---  -------  ------------  ----------  ----------\n");
8308
8309         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8310                 offset = i * TPC_QMAN_OFFSET;
8311                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8312                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8313                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8314                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8315                                 IS_TPC_IDLE(tpc_cfg_sts);
8316                 is_idle &= is_eng_idle;
8317
8318                 if (mask && !is_eng_idle)
8319                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8320                 if (s)
8321                         seq_printf(s, fmt, i,
8322                                 is_eng_idle ? "Y" : "N",
8323                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8324         }
8325
8326         if (s)
8327                 seq_puts(s,
8328                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8329                         "---  -------  ------------  ----------  -----------\n");
8330
8331         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8332                 offset = i * MME_QMAN_OFFSET;
8333                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8334                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8335
8336                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8337                 is_slave = i % 2;
8338                 if (!is_slave) {
8339                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8340                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8341                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8342                 }
8343
8344                 is_idle &= is_eng_idle;
8345
8346                 if (mask && !is_eng_idle)
8347                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8348                 if (s) {
8349                         if (!is_slave)
8350                                 seq_printf(s, fmt, i,
8351                                         is_eng_idle ? "Y" : "N",
8352                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8353                         else
8354                                 seq_printf(s, mme_slave_fmt, i,
8355                                         is_eng_idle ? "Y" : "N", "-",
8356                                         "-", mme_arch_sts);
8357                 }
8358         }
8359
8360         if (s)
8361                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8362                                 "---  -------  ------------  ----------\n");
8363
8364         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8365                 offset = i * NIC_MACRO_QMAN_OFFSET;
8366                 port = 2 * i;
8367                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8368                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8369                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8370                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8371                         is_idle &= is_eng_idle;
8372
8373                         if (mask && !is_eng_idle)
8374                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8375                         if (s)
8376                                 seq_printf(s, nic_fmt, port,
8377                                                 is_eng_idle ? "Y" : "N",
8378                                                 qm_glbl_sts0, qm_cgm_sts);
8379                 }
8380
8381                 port = 2 * i + 1;
8382                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8383                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8384                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8385                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8386                         is_idle &= is_eng_idle;
8387
8388                         if (mask && !is_eng_idle)
8389                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8390                         if (s)
8391                                 seq_printf(s, nic_fmt, port,
8392                                                 is_eng_idle ? "Y" : "N",
8393                                                 qm_glbl_sts0, qm_cgm_sts);
8394                 }
8395         }
8396
8397         if (s)
8398                 seq_puts(s, "\n");
8399
8400         hdev->asic_funcs->set_clock_gating(hdev);
8401
8402         mutex_unlock(&gaudi->clk_gate_mutex);
8403
8404         return is_idle;
8405 }
8406
8407 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8408         __acquires(&gaudi->hw_queues_lock)
8409 {
8410         struct gaudi_device *gaudi = hdev->asic_specific;
8411
8412         spin_lock(&gaudi->hw_queues_lock);
8413 }
8414
8415 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8416         __releases(&gaudi->hw_queues_lock)
8417 {
8418         struct gaudi_device *gaudi = hdev->asic_specific;
8419
8420         spin_unlock(&gaudi->hw_queues_lock);
8421 }
8422
8423 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8424 {
8425         return hdev->pdev->device;
8426 }
8427
8428 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8429                                 size_t max_size)
8430 {
8431         struct gaudi_device *gaudi = hdev->asic_specific;
8432
8433         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8434                 return 0;
8435
8436         return hl_fw_get_eeprom_data(hdev, data, max_size);
8437 }
8438
8439 /*
8440  * this function should be used only during initialization and/or after reset,
8441  * when there are no active users.
8442  */
8443 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8444                                 u32 tpc_id)
8445 {
8446         struct gaudi_device *gaudi = hdev->asic_specific;
8447         u64 kernel_timeout;
8448         u32 status, offset;
8449         int rc;
8450
8451         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8452
8453         if (hdev->pldm)
8454                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8455         else
8456                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8457
8458         mutex_lock(&gaudi->clk_gate_mutex);
8459
8460         hdev->asic_funcs->disable_clock_gating(hdev);
8461
8462         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8463                         lower_32_bits(tpc_kernel));
8464         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8465                         upper_32_bits(tpc_kernel));
8466
8467         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8468                         lower_32_bits(tpc_kernel));
8469         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8470                         upper_32_bits(tpc_kernel));
8471         /* set a valid LUT pointer, content is of no significance */
8472         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8473                         lower_32_bits(tpc_kernel));
8474         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8475                         upper_32_bits(tpc_kernel));
8476
8477         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8478                         lower_32_bits(CFG_BASE +
8479                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8480
8481         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8482                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8483                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8484         /* wait a bit for the engine to start executing */
8485         usleep_range(1000, 1500);
8486
8487         /* wait until engine has finished executing */
8488         rc = hl_poll_timeout(
8489                 hdev,
8490                 mmTPC0_CFG_STATUS + offset,
8491                 status,
8492                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8493                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8494                 1000,
8495                 kernel_timeout);
8496
8497         if (rc) {
8498                 dev_err(hdev->dev,
8499                         "Timeout while waiting for TPC%d icache prefetch\n",
8500                         tpc_id);
8501                 hdev->asic_funcs->set_clock_gating(hdev);
8502                 mutex_unlock(&gaudi->clk_gate_mutex);
8503                 return -EIO;
8504         }
8505
8506         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8507                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8508
8509         /* wait a bit for the engine to start executing */
8510         usleep_range(1000, 1500);
8511
8512         /* wait until engine has finished executing */
8513         rc = hl_poll_timeout(
8514                 hdev,
8515                 mmTPC0_CFG_STATUS + offset,
8516                 status,
8517                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8518                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8519                 1000,
8520                 kernel_timeout);
8521
8522         if (rc) {
8523                 dev_err(hdev->dev,
8524                         "Timeout while waiting for TPC%d vector pipe\n",
8525                         tpc_id);
8526                 hdev->asic_funcs->set_clock_gating(hdev);
8527                 mutex_unlock(&gaudi->clk_gate_mutex);
8528                 return -EIO;
8529         }
8530
8531         rc = hl_poll_timeout(
8532                 hdev,
8533                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8534                 status,
8535                 (status == 0),
8536                 1000,
8537                 kernel_timeout);
8538
8539         hdev->asic_funcs->set_clock_gating(hdev);
8540         mutex_unlock(&gaudi->clk_gate_mutex);
8541
8542         if (rc) {
8543                 dev_err(hdev->dev,
8544                         "Timeout while waiting for TPC%d kernel to execute\n",
8545                         tpc_id);
8546                 return -EIO;
8547         }
8548
8549         return 0;
8550 }
8551
8552 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8553                 struct hl_ctx *ctx)
8554 {
8555         struct gaudi_device *gaudi = hdev->asic_specific;
8556         int min_alloc_order, rc, collective_cb_size;
8557
8558         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8559                 return 0;
8560
8561         hdev->internal_cb_pool_virt_addr =
8562                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8563                                         HOST_SPACE_INTERNAL_CB_SZ,
8564                                         &hdev->internal_cb_pool_dma_addr,
8565                                         GFP_KERNEL | __GFP_ZERO);
8566
8567         if (!hdev->internal_cb_pool_virt_addr)
8568                 return -ENOMEM;
8569
8570         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8571                         sizeof(struct packet_fence);
8572         min_alloc_order = ilog2(collective_cb_size);
8573
8574         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8575         if (!hdev->internal_cb_pool) {
8576                 dev_err(hdev->dev,
8577                         "Failed to create internal CB pool\n");
8578                 rc = -ENOMEM;
8579                 goto free_internal_cb_pool;
8580         }
8581
8582         rc = gen_pool_add(hdev->internal_cb_pool,
8583                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8584                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8585         if (rc) {
8586                 dev_err(hdev->dev,
8587                         "Failed to add memory to internal CB pool\n");
8588                 rc = -EFAULT;
8589                 goto destroy_internal_cb_pool;
8590         }
8591
8592         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8593                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8594                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8595
8596         if (!hdev->internal_cb_va_base) {
8597                 rc = -ENOMEM;
8598                 goto destroy_internal_cb_pool;
8599         }
8600
8601         mutex_lock(&ctx->mmu_lock);
8602         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8603                         hdev->internal_cb_pool_dma_addr,
8604                         HOST_SPACE_INTERNAL_CB_SZ);
8605
8606         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8607         mutex_unlock(&ctx->mmu_lock);
8608
8609         if (rc)
8610                 goto unreserve_internal_cb_pool;
8611
8612         return 0;
8613
8614 unreserve_internal_cb_pool:
8615         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8616                         HOST_SPACE_INTERNAL_CB_SZ);
8617 destroy_internal_cb_pool:
8618         gen_pool_destroy(hdev->internal_cb_pool);
8619 free_internal_cb_pool:
8620         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8621                         HOST_SPACE_INTERNAL_CB_SZ,
8622                         hdev->internal_cb_pool_virt_addr,
8623                         hdev->internal_cb_pool_dma_addr);
8624
8625         return rc;
8626 }
8627
8628 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8629                 struct hl_ctx *ctx)
8630 {
8631         struct gaudi_device *gaudi = hdev->asic_specific;
8632
8633         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8634                 return;
8635
8636         mutex_lock(&ctx->mmu_lock);
8637         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8638                         HOST_SPACE_INTERNAL_CB_SZ);
8639         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8640                         HOST_SPACE_INTERNAL_CB_SZ);
8641         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8642         mutex_unlock(&ctx->mmu_lock);
8643
8644         gen_pool_destroy(hdev->internal_cb_pool);
8645
8646         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8647                         HOST_SPACE_INTERNAL_CB_SZ,
8648                         hdev->internal_cb_pool_virt_addr,
8649                         hdev->internal_cb_pool_dma_addr);
8650 }
8651
8652 static int gaudi_ctx_init(struct hl_ctx *ctx)
8653 {
8654         if (ctx->asid == HL_KERNEL_ASID_ID)
8655                 return 0;
8656
8657         gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8658         return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8659 }
8660
8661 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8662 {
8663         if (ctx->asid == HL_KERNEL_ASID_ID)
8664                 return;
8665
8666         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8667 }
8668
8669 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8670 {
8671         return gaudi_cq_assignment[cq_idx];
8672 }
8673
8674 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8675 {
8676         return sizeof(struct packet_msg_short) +
8677                         sizeof(struct packet_msg_prot) * 2;
8678 }
8679
8680 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8681 {
8682         return sizeof(struct packet_msg_short) * 4 +
8683                         sizeof(struct packet_fence) +
8684                         sizeof(struct packet_msg_prot) * 2;
8685 }
8686
8687 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8688                                 u32 size, bool eb)
8689 {
8690         struct hl_cb *cb = (struct hl_cb *) data;
8691         struct packet_msg_short *pkt;
8692         u32 value, ctl, pkt_size = sizeof(*pkt);
8693
8694         pkt = cb->kernel_address + size;
8695         memset(pkt, 0, pkt_size);
8696
8697         /* Inc by 1, Mode ADD */
8698         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8699         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8700
8701         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8702         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8703         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8704         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8705         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8706         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8707         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8708
8709         pkt->value = cpu_to_le32(value);
8710         pkt->ctl = cpu_to_le32(ctl);
8711
8712         return size + pkt_size;
8713 }
8714
8715 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8716                                         u16 addr)
8717 {
8718         u32 ctl, pkt_size = sizeof(*pkt);
8719
8720         memset(pkt, 0, pkt_size);
8721
8722         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8723         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8724         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8725         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8726         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8727         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8728
8729         pkt->value = cpu_to_le32(value);
8730         pkt->ctl = cpu_to_le32(ctl);
8731
8732         return pkt_size;
8733 }
8734
8735 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8736                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8737                 u16 sob_val, u16 mon_id)
8738 {
8739         u64 monitor_base;
8740         u32 ctl, value, pkt_size = sizeof(*pkt);
8741         u16 msg_addr_offset;
8742         u8 mask;
8743
8744         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8745                 dev_err(hdev->dev,
8746                         "sob_base %u (mask %#x) is not valid\n",
8747                         sob_base, sob_mask);
8748                 return 0;
8749         }
8750
8751         /*
8752          * monitor_base should be the content of the base0 address registers,
8753          * so it will be added to the msg short offsets
8754          */
8755         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8756
8757         msg_addr_offset =
8758                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8759                                 monitor_base;
8760
8761         memset(pkt, 0, pkt_size);
8762
8763         /* Monitor config packet: bind the monitor to a sync object */
8764         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8765         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8766         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8767                         0); /* GREATER OR EQUAL*/
8768         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8769
8770         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8771         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8772         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8773         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8774         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8775         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8776         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8777
8778         pkt->value = cpu_to_le32(value);
8779         pkt->ctl = cpu_to_le32(ctl);
8780
8781         return pkt_size;
8782 }
8783
8784 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8785 {
8786         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8787
8788         memset(pkt, 0, pkt_size);
8789
8790         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8791         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8792         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8793
8794         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8795         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8796         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8797         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8798
8799         pkt->cfg = cpu_to_le32(cfg);
8800         pkt->ctl = cpu_to_le32(ctl);
8801
8802         return pkt_size;
8803 }
8804
8805 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8806 {
8807         u32 offset, nic_index;
8808
8809         switch (queue_id) {
8810         case GAUDI_QUEUE_ID_DMA_0_0:
8811                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8812                 break;
8813         case GAUDI_QUEUE_ID_DMA_0_1:
8814                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8815                 break;
8816         case GAUDI_QUEUE_ID_DMA_0_2:
8817                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8818                 break;
8819         case GAUDI_QUEUE_ID_DMA_0_3:
8820                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8821                 break;
8822         case GAUDI_QUEUE_ID_DMA_1_0:
8823                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8824                 break;
8825         case GAUDI_QUEUE_ID_DMA_1_1:
8826                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8827                 break;
8828         case GAUDI_QUEUE_ID_DMA_1_2:
8829                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8830                 break;
8831         case GAUDI_QUEUE_ID_DMA_1_3:
8832                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8833                 break;
8834         case GAUDI_QUEUE_ID_DMA_5_0:
8835                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8836                 break;
8837         case GAUDI_QUEUE_ID_DMA_5_1:
8838                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8839                 break;
8840         case GAUDI_QUEUE_ID_DMA_5_2:
8841                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8842                 break;
8843         case GAUDI_QUEUE_ID_DMA_5_3:
8844                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8845                 break;
8846         case GAUDI_QUEUE_ID_TPC_7_0:
8847                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8848                 break;
8849         case GAUDI_QUEUE_ID_TPC_7_1:
8850                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8851                 break;
8852         case GAUDI_QUEUE_ID_TPC_7_2:
8853                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8854                 break;
8855         case GAUDI_QUEUE_ID_TPC_7_3:
8856                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8857                 break;
8858         case GAUDI_QUEUE_ID_NIC_0_0:
8859         case GAUDI_QUEUE_ID_NIC_1_0:
8860         case GAUDI_QUEUE_ID_NIC_2_0:
8861         case GAUDI_QUEUE_ID_NIC_3_0:
8862         case GAUDI_QUEUE_ID_NIC_4_0:
8863         case GAUDI_QUEUE_ID_NIC_5_0:
8864         case GAUDI_QUEUE_ID_NIC_6_0:
8865         case GAUDI_QUEUE_ID_NIC_7_0:
8866         case GAUDI_QUEUE_ID_NIC_8_0:
8867         case GAUDI_QUEUE_ID_NIC_9_0:
8868                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8869                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8870                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8871                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8872                 break;
8873         case GAUDI_QUEUE_ID_NIC_0_1:
8874         case GAUDI_QUEUE_ID_NIC_1_1:
8875         case GAUDI_QUEUE_ID_NIC_2_1:
8876         case GAUDI_QUEUE_ID_NIC_3_1:
8877         case GAUDI_QUEUE_ID_NIC_4_1:
8878         case GAUDI_QUEUE_ID_NIC_5_1:
8879         case GAUDI_QUEUE_ID_NIC_6_1:
8880         case GAUDI_QUEUE_ID_NIC_7_1:
8881         case GAUDI_QUEUE_ID_NIC_8_1:
8882         case GAUDI_QUEUE_ID_NIC_9_1:
8883                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8884                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8885                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8886                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8887                 break;
8888         case GAUDI_QUEUE_ID_NIC_0_2:
8889         case GAUDI_QUEUE_ID_NIC_1_2:
8890         case GAUDI_QUEUE_ID_NIC_2_2:
8891         case GAUDI_QUEUE_ID_NIC_3_2:
8892         case GAUDI_QUEUE_ID_NIC_4_2:
8893         case GAUDI_QUEUE_ID_NIC_5_2:
8894         case GAUDI_QUEUE_ID_NIC_6_2:
8895         case GAUDI_QUEUE_ID_NIC_7_2:
8896         case GAUDI_QUEUE_ID_NIC_8_2:
8897         case GAUDI_QUEUE_ID_NIC_9_2:
8898                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8899                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8900                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8901                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8902                 break;
8903         case GAUDI_QUEUE_ID_NIC_0_3:
8904         case GAUDI_QUEUE_ID_NIC_1_3:
8905         case GAUDI_QUEUE_ID_NIC_2_3:
8906         case GAUDI_QUEUE_ID_NIC_3_3:
8907         case GAUDI_QUEUE_ID_NIC_4_3:
8908         case GAUDI_QUEUE_ID_NIC_5_3:
8909         case GAUDI_QUEUE_ID_NIC_6_3:
8910         case GAUDI_QUEUE_ID_NIC_7_3:
8911         case GAUDI_QUEUE_ID_NIC_8_3:
8912         case GAUDI_QUEUE_ID_NIC_9_3:
8913                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8914                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8915                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8916                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8917                 break;
8918         default:
8919                 return -EINVAL;
8920         }
8921
8922         *addr = CFG_BASE + offset;
8923
8924         return 0;
8925 }
8926
8927 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8928 {
8929         u64 monitor_base;
8930         u32 size = 0;
8931         u16 msg_addr_offset;
8932
8933         /*
8934          * monitor_base should be the content of the base0 address registers,
8935          * so it will be added to the msg short offsets
8936          */
8937         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8938
8939         /* First monitor config packet: low address of the sync */
8940         msg_addr_offset =
8941                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8942                                 monitor_base;
8943
8944         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8945                                         msg_addr_offset);
8946
8947         /* Second monitor config packet: high address of the sync */
8948         msg_addr_offset =
8949                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8950                                 monitor_base;
8951
8952         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8953                                         msg_addr_offset);
8954
8955         /*
8956          * Third monitor config packet: the payload, i.e. what to write when the
8957          * sync triggers
8958          */
8959         msg_addr_offset =
8960                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8961                                 monitor_base;
8962
8963         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8964
8965         return size;
8966 }
8967
8968 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8969                                 struct hl_gen_wait_properties *prop)
8970 {
8971         struct hl_cb *cb = (struct hl_cb *) prop->data;
8972         void *buf = cb->kernel_address;
8973         u64 fence_addr = 0;
8974         u32 size = prop->size;
8975
8976         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8977                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8978                                 prop->q_idx);
8979                 return 0;
8980         }
8981
8982         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8983         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8984                         prop->sob_mask, prop->sob_val, prop->mon_id);
8985         size += gaudi_add_fence_pkt(buf + size);
8986
8987         return size;
8988 }
8989
8990 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8991 {
8992         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8993         int rc;
8994
8995         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8996                 hw_sob->sob_id);
8997
8998         rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8999                         CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9000                         hw_sob->sob_id * 4, 1, 0);
9001         if (rc)
9002                 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
9003
9004         kref_init(&hw_sob->kref);
9005 }
9006
9007 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9008 {
9009         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9010                                                         HL_POWER9_HOST_MAGIC) {
9011                 hdev->power9_64bit_dma_enable = 1;
9012                 hdev->dma_mask = 64;
9013         } else {
9014                 hdev->power9_64bit_dma_enable = 0;
9015                 hdev->dma_mask = 48;
9016         }
9017 }
9018
9019 static u64 gaudi_get_device_time(struct hl_device *hdev)
9020 {
9021         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9022
9023         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9024 }
9025
9026 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9027                                 u32 *block_size, u32 *block_id)
9028 {
9029         return -EPERM;
9030 }
9031
9032 static int gaudi_block_mmap(struct hl_device *hdev,
9033                                 struct vm_area_struct *vma,
9034                                 u32 block_id, u32 block_size)
9035 {
9036         return -EPERM;
9037 }
9038
9039 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9040 {
9041         struct cpu_dyn_regs *dyn_regs =
9042                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9043         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9044                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9045                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9046
9047         WREG32(irq_handler_offset,
9048                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9049 }
9050
9051 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9052 {
9053         switch (pll_idx) {
9054         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9055         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9056         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9057         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9058         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9059         case HL_GAUDI_MME_PLL: return MME_PLL;
9060         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9061         case HL_GAUDI_IF_PLL: return IF_PLL;
9062         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9063         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9064         default: return -EINVAL;
9065         }
9066 }
9067
9068 static int gaudi_add_sync_to_engine_map_entry(
9069         struct hl_sync_to_engine_map *map, u32 reg_value,
9070         enum hl_sync_engine_type engine_type, u32 engine_id)
9071 {
9072         struct hl_sync_to_engine_map_entry *entry;
9073
9074         /* Reg value represents a partial address of sync object,
9075          * it is used as unique identifier. For this we need to
9076          * clear the cutoff cfg base bits from the value.
9077          */
9078         if (reg_value == 0 || reg_value == 0xffffffff)
9079                 return 0;
9080         reg_value -= (u32)CFG_BASE;
9081
9082         /* create a new hash entry */
9083         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9084         if (!entry)
9085                 return -ENOMEM;
9086         entry->engine_type = engine_type;
9087         entry->engine_id = engine_id;
9088         entry->sync_id = reg_value;
9089         hash_add(map->tb, &entry->node, reg_value);
9090
9091         return 0;
9092 }
9093
9094 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9095                                 struct hl_sync_to_engine_map *map)
9096 {
9097         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9098         struct gaudi_device *gaudi = hdev->asic_specific;
9099         int i, j, rc;
9100         u32 reg_value;
9101
9102         /* Iterate over TPC engines */
9103         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9104                 /* TPC registered must be accessed with clock gating disabled */
9105                 mutex_lock(&gaudi->clk_gate_mutex);
9106                 hdev->asic_funcs->disable_clock_gating(hdev);
9107
9108                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9109                                         sds->props[SP_NEXT_TPC] * i);
9110
9111                 /* We can reenable clock_gating */
9112                 hdev->asic_funcs->set_clock_gating(hdev);
9113                 mutex_unlock(&gaudi->clk_gate_mutex);
9114
9115                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9116                                                         ENGINE_TPC, i);
9117                 if (rc)
9118                         goto free_sync_to_engine_map;
9119         }
9120
9121         /* Iterate over MME engines */
9122         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9123                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9124                         /* MME registered must be accessed with clock gating
9125                          * disabled
9126                          */
9127                         mutex_lock(&gaudi->clk_gate_mutex);
9128                         hdev->asic_funcs->disable_clock_gating(hdev);
9129
9130                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9131                                                 sds->props[SP_NEXT_MME] * i +
9132                                                 j * sizeof(u32));
9133
9134                         /* We can reenable clock_gating */
9135                         hdev->asic_funcs->set_clock_gating(hdev);
9136                         mutex_unlock(&gaudi->clk_gate_mutex);
9137
9138                         rc = gaudi_add_sync_to_engine_map_entry(
9139                                 map, reg_value, ENGINE_MME,
9140                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9141                         if (rc)
9142                                 goto free_sync_to_engine_map;
9143                 }
9144         }
9145
9146         /* Iterate over DMA engines */
9147         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9148                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9149                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9150                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9151                                                         ENGINE_DMA, i);
9152                 if (rc)
9153                         goto free_sync_to_engine_map;
9154         }
9155
9156         return 0;
9157
9158 free_sync_to_engine_map:
9159         hl_state_dump_free_sync_to_engine_map(map);
9160
9161         return rc;
9162 }
9163
9164 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9165 {
9166         return FIELD_GET(
9167                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9168                 mon->status);
9169 }
9170
9171 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9172                                 struct hl_device *hdev,
9173                                 struct hl_mon_state_dump *mon)
9174 {
9175         const char *name;
9176         char scratch_buf1[BIN_REG_STRING_SIZE],
9177                 scratch_buf2[BIN_REG_STRING_SIZE];
9178
9179         name = hl_state_dump_get_monitor_name(hdev, mon);
9180         if (!name)
9181                 name = "";
9182
9183         return hl_snprintf_resize(
9184                 buf, size, offset,
9185                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9186                 mon->id, name,
9187                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9188                                 mon->arm_data),
9189                 hl_format_as_binary(
9190                         scratch_buf1, sizeof(scratch_buf1),
9191                         FIELD_GET(
9192                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9193                                 mon->arm_data)),
9194                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9195                                 mon->arm_data),
9196                 mon->wr_data,
9197                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9198                 hl_format_as_binary(
9199                         scratch_buf2, sizeof(scratch_buf2),
9200                         FIELD_GET(
9201                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9202                                 mon->status)));
9203 }
9204
9205
9206 static int gaudi_print_fences_single_engine(
9207         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9208         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9209         size_t *size, size_t *offset)
9210 {
9211         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9212         int rc = -ENOMEM, i;
9213         u32 *statuses, *fences;
9214
9215         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9216                         sizeof(*statuses), GFP_KERNEL);
9217         if (!statuses)
9218                 goto out;
9219
9220         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9221                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9222                          sizeof(*fences), GFP_KERNEL);
9223         if (!fences)
9224                 goto free_status;
9225
9226         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9227                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9228
9229         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9230                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9231                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9232
9233         /* The actual print */
9234         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9235                 u32 fence_id;
9236                 u64 fence_cnt, fence_rdata;
9237                 const char *engine_name;
9238
9239                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9240                         statuses[i]))
9241                         continue;
9242
9243                 fence_id =
9244                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9245                 fence_cnt = base_offset + CFG_BASE +
9246                         sizeof(u32) *
9247                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9248                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9249                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9250                 engine_name = hl_sync_engine_to_string(engine_type);
9251
9252                 rc = hl_snprintf_resize(
9253                         buf, size, offset,
9254                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9255                         engine_name, engine_id,
9256                         i, fence_id,
9257                         fence_cnt, engine_name, engine_id, fence_id, i,
9258                         fence_rdata, engine_name, engine_id, fence_id, i,
9259                         fences[fence_id],
9260                         statuses[i]);
9261                 if (rc)
9262                         goto free_fences;
9263         }
9264
9265         rc = 0;
9266
9267 free_fences:
9268         kfree(fences);
9269 free_status:
9270         kfree(statuses);
9271 out:
9272         return rc;
9273 }
9274
9275
9276 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9277         .monitor_valid = gaudi_monitor_valid,
9278         .print_single_monitor = gaudi_print_single_monitor,
9279         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9280         .print_fences_single_engine = gaudi_print_fences_single_engine,
9281 };
9282
9283 static void gaudi_state_dump_init(struct hl_device *hdev)
9284 {
9285         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9286         int i;
9287
9288         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9289                 hash_add(sds->so_id_to_str_tb,
9290                         &gaudi_so_id_to_str[i].node,
9291                         gaudi_so_id_to_str[i].id);
9292
9293         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9294                 hash_add(sds->monitor_id_to_str_tb,
9295                         &gaudi_monitor_id_to_str[i].node,
9296                         gaudi_monitor_id_to_str[i].id);
9297
9298         sds->props = gaudi_state_dump_specs_props;
9299
9300         sds->sync_namager_names = gaudi_sync_manager_names;
9301
9302         sds->funcs = gaudi_state_dump_funcs;
9303 }
9304
9305 static const struct hl_asic_funcs gaudi_funcs = {
9306         .early_init = gaudi_early_init,
9307         .early_fini = gaudi_early_fini,
9308         .late_init = gaudi_late_init,
9309         .late_fini = gaudi_late_fini,
9310         .sw_init = gaudi_sw_init,
9311         .sw_fini = gaudi_sw_fini,
9312         .hw_init = gaudi_hw_init,
9313         .hw_fini = gaudi_hw_fini,
9314         .halt_engines = gaudi_halt_engines,
9315         .suspend = gaudi_suspend,
9316         .resume = gaudi_resume,
9317         .mmap = gaudi_mmap,
9318         .ring_doorbell = gaudi_ring_doorbell,
9319         .pqe_write = gaudi_pqe_write,
9320         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9321         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9322         .scrub_device_mem = gaudi_scrub_device_mem,
9323         .get_int_queue_base = gaudi_get_int_queue_base,
9324         .test_queues = gaudi_test_queues,
9325         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9326         .asic_dma_pool_free = gaudi_dma_pool_free,
9327         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9328         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9329         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9330         .cs_parser = gaudi_cs_parser,
9331         .asic_dma_map_sg = gaudi_dma_map_sg,
9332         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9333         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9334         .update_eq_ci = gaudi_update_eq_ci,
9335         .context_switch = gaudi_context_switch,
9336         .restore_phase_topology = gaudi_restore_phase_topology,
9337         .debugfs_read32 = gaudi_debugfs_read32,
9338         .debugfs_write32 = gaudi_debugfs_write32,
9339         .debugfs_read64 = gaudi_debugfs_read64,
9340         .debugfs_write64 = gaudi_debugfs_write64,
9341         .debugfs_read_dma = gaudi_debugfs_read_dma,
9342         .add_device_attr = gaudi_add_device_attr,
9343         .handle_eqe = gaudi_handle_eqe,
9344         .set_pll_profile = gaudi_set_pll_profile,
9345         .get_events_stat = gaudi_get_events_stat,
9346         .read_pte = gaudi_read_pte,
9347         .write_pte = gaudi_write_pte,
9348         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9349         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9350         .send_heartbeat = gaudi_send_heartbeat,
9351         .set_clock_gating = gaudi_set_clock_gating,
9352         .disable_clock_gating = gaudi_disable_clock_gating,
9353         .debug_coresight = gaudi_debug_coresight,
9354         .is_device_idle = gaudi_is_device_idle,
9355         .soft_reset_late_init = gaudi_soft_reset_late_init,
9356         .hw_queues_lock = gaudi_hw_queues_lock,
9357         .hw_queues_unlock = gaudi_hw_queues_unlock,
9358         .get_pci_id = gaudi_get_pci_id,
9359         .get_eeprom_data = gaudi_get_eeprom_data,
9360         .send_cpu_message = gaudi_send_cpu_message,
9361         .pci_bars_map = gaudi_pci_bars_map,
9362         .init_iatu = gaudi_init_iatu,
9363         .rreg = hl_rreg,
9364         .wreg = hl_wreg,
9365         .halt_coresight = gaudi_halt_coresight,
9366         .ctx_init = gaudi_ctx_init,
9367         .ctx_fini = gaudi_ctx_fini,
9368         .get_clk_rate = gaudi_get_clk_rate,
9369         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9370         .load_firmware_to_device = gaudi_load_firmware_to_device,
9371         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9372         .get_signal_cb_size = gaudi_get_signal_cb_size,
9373         .get_wait_cb_size = gaudi_get_wait_cb_size,
9374         .gen_signal_cb = gaudi_gen_signal_cb,
9375         .gen_wait_cb = gaudi_gen_wait_cb,
9376         .reset_sob = gaudi_reset_sob,
9377         .reset_sob_group = gaudi_reset_sob_group,
9378         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9379         .get_device_time = gaudi_get_device_time,
9380         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9381         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9382         .scramble_addr = hl_mmu_scramble_addr,
9383         .descramble_addr = hl_mmu_descramble_addr,
9384         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9385         .get_hw_block_id = gaudi_get_hw_block_id,
9386         .hw_block_mmap = gaudi_block_mmap,
9387         .enable_events_from_fw = gaudi_enable_events_from_fw,
9388         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9389         .init_firmware_loader = gaudi_init_firmware_loader,
9390         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9391         .state_dump_init = gaudi_state_dump_init
9392 };
9393
9394 /**
9395  * gaudi_set_asic_funcs - set GAUDI function pointers
9396  *
9397  * @hdev: pointer to hl_device structure
9398  *
9399  */
9400 void gaudi_set_asic_funcs(struct hl_device *hdev)
9401 {
9402         hdev->asic_funcs = &gaudi_funcs;
9403 }