habanalabs/gaudi: minimize number of register reads
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 #define BIN_REG_STRING_SIZE     sizeof("0b10101010101010101010101010101010")
110
111 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
112                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
113                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
114                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
115                 "gaudi cpu eq"
116 };
117
118 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
119         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
120         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
121         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
122         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
123         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
124         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
125         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
126         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
127 };
128
129 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
130         [0] = GAUDI_QUEUE_ID_DMA_0_0,
131         [1] = GAUDI_QUEUE_ID_DMA_0_1,
132         [2] = GAUDI_QUEUE_ID_DMA_0_2,
133         [3] = GAUDI_QUEUE_ID_DMA_0_3,
134         [4] = GAUDI_QUEUE_ID_DMA_1_0,
135         [5] = GAUDI_QUEUE_ID_DMA_1_1,
136         [6] = GAUDI_QUEUE_ID_DMA_1_2,
137         [7] = GAUDI_QUEUE_ID_DMA_1_3,
138 };
139
140 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
141         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
142         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
143         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
144         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
145         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
146         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
147         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
148         [PACKET_FENCE]          = sizeof(struct packet_fence),
149         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
150         [PACKET_NOP]            = sizeof(struct packet_nop),
151         [PACKET_STOP]           = sizeof(struct packet_stop),
152         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
153         [PACKET_WAIT]           = sizeof(struct packet_wait),
154         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
155 };
156
157 static inline bool validate_packet_id(enum packet_id id)
158 {
159         switch (id) {
160         case PACKET_WREG_32:
161         case PACKET_WREG_BULK:
162         case PACKET_MSG_LONG:
163         case PACKET_MSG_SHORT:
164         case PACKET_CP_DMA:
165         case PACKET_REPEAT:
166         case PACKET_MSG_PROT:
167         case PACKET_FENCE:
168         case PACKET_LIN_DMA:
169         case PACKET_NOP:
170         case PACKET_STOP:
171         case PACKET_ARB_POINT:
172         case PACKET_WAIT:
173         case PACKET_LOAD_AND_EXE:
174                 return true;
175         default:
176                 return false;
177         }
178 }
179
180 static const char * const
181 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
182         "tpc_address_exceed_slm",
183         "tpc_div_by_0",
184         "tpc_spu_mac_overflow",
185         "tpc_spu_addsub_overflow",
186         "tpc_spu_abs_overflow",
187         "tpc_spu_fp_dst_nan_inf",
188         "tpc_spu_fp_dst_denorm",
189         "tpc_vpu_mac_overflow",
190         "tpc_vpu_addsub_overflow",
191         "tpc_vpu_abs_overflow",
192         "tpc_vpu_fp_dst_nan_inf",
193         "tpc_vpu_fp_dst_denorm",
194         "tpc_assertions",
195         "tpc_illegal_instruction",
196         "tpc_pc_wrap_around",
197         "tpc_qm_sw_err",
198         "tpc_hbw_rresp_err",
199         "tpc_hbw_bresp_err",
200         "tpc_lbw_rresp_err",
201         "tpc_lbw_bresp_err"
202 };
203
204 static const char * const
205 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
206         "PQ AXI HBW error",
207         "CQ AXI HBW error",
208         "CP AXI HBW error",
209         "CP error due to undefined OPCODE",
210         "CP encountered STOP OPCODE",
211         "CP AXI LBW error",
212         "CP WRREG32 or WRBULK returned error",
213         "N/A",
214         "FENCE 0 inc over max value and clipped",
215         "FENCE 1 inc over max value and clipped",
216         "FENCE 2 inc over max value and clipped",
217         "FENCE 3 inc over max value and clipped",
218         "FENCE 0 dec under min value and clipped",
219         "FENCE 1 dec under min value and clipped",
220         "FENCE 2 dec under min value and clipped",
221         "FENCE 3 dec under min value and clipped"
222 };
223
224 static const char * const
225 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
226         "Choice push while full error",
227         "Choice Q watchdog error",
228         "MSG AXI LBW returned with error"
229 };
230
231 enum gaudi_sm_sei_cause {
232         GAUDI_SM_SEI_SO_OVERFLOW,
233         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
234         GAUDI_SM_SEI_AXI_RESPONSE_ERR
235 };
236
237 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
246         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
351 };
352
353 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
354         { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
355         { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
356         { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
357         { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
358         { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
359         { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
360         { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
361         { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
362         { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
363         { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
364         { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
365         { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
366         { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
367         { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
368         { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
369         { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
370         { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
371         { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
372         { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
373         { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
374         { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
375         { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
376         { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
377         { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
378         { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
379         { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
380         { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
381 };
382
383 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
384         { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
385         { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
386         { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
387         { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
388         { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
389         { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
390         { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
391         { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
392         { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
393         { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
394         { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
395 };
396
397 static s64 gaudi_state_dump_specs_props[] = {
398         [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
399         [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
400         [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
401         [SP_MON_OBJ_WR_ADDR_LOW] =
402                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
403         [SP_MON_OBJ_WR_ADDR_HIGH] =
404                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
405         [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
406         [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
407         [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
408         [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
409         [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
410         [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
411         [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
412         [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
413         [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
414         [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
415         [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
416         [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
417         [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
418         [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
419         [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
420         [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
421         [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
422         [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
423         [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
424         [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
425         [SP_FENCE0_CNT_OFFSET] =
426                 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
427         [SP_FENCE0_RDATA_OFFSET] =
428                 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
429         [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
430         [SP_NUM_CORES] = 1,
431 };
432
433 /* The order here is opposite to the order of the indexing in the h/w.
434  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
435  */
436 static const char * const gaudi_sync_manager_names[] = {
437         "SYNC_MGR_E_N",
438         "SYNC_MGR_W_N",
439         "SYNC_MGR_E_S",
440         "SYNC_MGR_W_S",
441         NULL
442 };
443
444 struct ecc_info_extract_params {
445         u64 block_address;
446         u32 num_memories;
447         bool derr;
448         bool disable_clock_gating;
449 };
450
451 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
452                                                                 u64 phys_addr);
453 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
454                                         struct hl_cs_job *job);
455 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
456                                         u32 size, u64 val);
457 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
458                                         u32 num_regs, u32 val);
459 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
460                                 u32 tpc_id);
461 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
462 static int gaudi_cpucp_info_get(struct hl_device *hdev);
463 static void gaudi_disable_clock_gating(struct hl_device *hdev);
464 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
465 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
466                                 u32 size, bool eb);
467 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
468                                 struct hl_gen_wait_properties *prop);
469 static inline enum hl_collective_mode
470 get_collective_mode(struct hl_device *hdev, u32 queue_id)
471 {
472         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
473                 return HL_COLLECTIVE_MASTER;
474
475         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
476                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
477                 return HL_COLLECTIVE_SLAVE;
478
479         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
480                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
481                 return HL_COLLECTIVE_SLAVE;
482
483         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
484                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
485                 return HL_COLLECTIVE_SLAVE;
486
487         return HL_COLLECTIVE_NOT_SUPPORTED;
488 }
489
490 static inline void set_default_power_values(struct hl_device *hdev)
491 {
492         struct asic_fixed_properties *prop = &hdev->asic_prop;
493
494         if (hdev->card_type == cpucp_card_type_pmc) {
495                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
496                 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
497         } else {
498                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
499                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
500         }
501 }
502
503 static int gaudi_set_fixed_properties(struct hl_device *hdev)
504 {
505         struct asic_fixed_properties *prop = &hdev->asic_prop;
506         u32 num_sync_stream_queues = 0;
507         int i;
508
509         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
510         prop->hw_queues_props = kcalloc(prop->max_queues,
511                         sizeof(struct hw_queue_properties),
512                         GFP_KERNEL);
513
514         if (!prop->hw_queues_props)
515                 return -ENOMEM;
516
517         for (i = 0 ; i < prop->max_queues ; i++) {
518                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
519                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
520                         prop->hw_queues_props[i].driver_only = 0;
521                         prop->hw_queues_props[i].supports_sync_stream = 1;
522                         prop->hw_queues_props[i].cb_alloc_flags =
523                                 CB_ALLOC_KERNEL;
524                         num_sync_stream_queues++;
525                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
526                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
527                         prop->hw_queues_props[i].driver_only = 1;
528                         prop->hw_queues_props[i].supports_sync_stream = 0;
529                         prop->hw_queues_props[i].cb_alloc_flags =
530                                 CB_ALLOC_KERNEL;
531                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
532                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
533                         prop->hw_queues_props[i].driver_only = 0;
534                         prop->hw_queues_props[i].supports_sync_stream = 0;
535                         prop->hw_queues_props[i].cb_alloc_flags =
536                                 CB_ALLOC_USER;
537
538                 }
539                 prop->hw_queues_props[i].collective_mode =
540                                                 get_collective_mode(hdev, i);
541         }
542
543         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
544         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
545         prop->collective_first_sob = 0;
546         prop->collective_first_mon = 0;
547
548         /* 2 SOBs per internal queue stream are reserved for collective */
549         prop->sync_stream_first_sob =
550                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
551                         * QMAN_STREAMS * HL_RSVD_SOBS;
552
553         /* 1 monitor per internal queue stream are reserved for collective
554          * 2 monitors per external queue stream are reserved for collective
555          */
556         prop->sync_stream_first_mon =
557                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
558                         (NUMBER_OF_EXT_HW_QUEUES * 2);
559
560         prop->dram_base_address = DRAM_PHYS_BASE;
561         prop->dram_size = GAUDI_HBM_SIZE_32GB;
562         prop->dram_end_address = prop->dram_base_address +
563                                         prop->dram_size;
564         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
565
566         prop->sram_base_address = SRAM_BASE_ADDR;
567         prop->sram_size = SRAM_SIZE;
568         prop->sram_end_address = prop->sram_base_address +
569                                         prop->sram_size;
570         prop->sram_user_base_address = prop->sram_base_address +
571                                         SRAM_USER_BASE_OFFSET;
572
573         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
574         if (hdev->pldm)
575                 prop->mmu_pgt_size = 0x800000; /* 8MB */
576         else
577                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
578         prop->mmu_pte_size = HL_PTE_SIZE;
579         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
580         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
581         prop->dram_page_size = PAGE_SIZE_2MB;
582         prop->dram_supports_virtual_memory = false;
583
584         prop->pmmu.hop0_shift = HOP0_SHIFT;
585         prop->pmmu.hop1_shift = HOP1_SHIFT;
586         prop->pmmu.hop2_shift = HOP2_SHIFT;
587         prop->pmmu.hop3_shift = HOP3_SHIFT;
588         prop->pmmu.hop4_shift = HOP4_SHIFT;
589         prop->pmmu.hop0_mask = HOP0_MASK;
590         prop->pmmu.hop1_mask = HOP1_MASK;
591         prop->pmmu.hop2_mask = HOP2_MASK;
592         prop->pmmu.hop3_mask = HOP3_MASK;
593         prop->pmmu.hop4_mask = HOP4_MASK;
594         prop->pmmu.start_addr = VA_HOST_SPACE_START;
595         prop->pmmu.end_addr =
596                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
597         prop->pmmu.page_size = PAGE_SIZE_4KB;
598         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
599
600         /* PMMU and HPMMU are the same except of page size */
601         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
602         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
603
604         /* shifts and masks are the same in PMMU and DMMU */
605         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
606         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
607         prop->dmmu.end_addr = VA_HOST_SPACE_END;
608         prop->dmmu.page_size = PAGE_SIZE_2MB;
609
610         prop->cfg_size = CFG_SIZE;
611         prop->max_asid = MAX_ASID;
612         prop->num_of_events = GAUDI_EVENT_SIZE;
613         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
614
615         set_default_power_values(hdev);
616
617         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
618         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
619
620         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
621         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
622
623         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
624                                         CARD_NAME_MAX_LEN);
625
626         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
627
628         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
629                         prop->sync_stream_first_sob +
630                         (num_sync_stream_queues * HL_RSVD_SOBS);
631         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
632                         prop->sync_stream_first_mon +
633                         (num_sync_stream_queues * HL_RSVD_MONS);
634
635         prop->first_available_user_msix_interrupt = USHRT_MAX;
636
637         for (i = 0 ; i < HL_MAX_DCORES ; i++)
638                 prop->first_available_cq[i] = USHRT_MAX;
639
640         prop->fw_cpu_boot_dev_sts0_valid = false;
641         prop->fw_cpu_boot_dev_sts1_valid = false;
642         prop->hard_reset_done_by_fw = false;
643         prop->gic_interrupts_enable = true;
644
645         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
646
647         return 0;
648 }
649
650 static int gaudi_pci_bars_map(struct hl_device *hdev)
651 {
652         static const char * const name[] = {"SRAM", "CFG", "HBM"};
653         bool is_wc[3] = {false, false, true};
654         int rc;
655
656         rc = hl_pci_bars_map(hdev, name, is_wc);
657         if (rc)
658                 return rc;
659
660         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
661                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
662
663         return 0;
664 }
665
666 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
667 {
668         struct gaudi_device *gaudi = hdev->asic_specific;
669         struct hl_inbound_pci_region pci_region;
670         u64 old_addr = addr;
671         int rc;
672
673         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
674                 return old_addr;
675
676         if (hdev->asic_prop.iatu_done_by_fw)
677                 return U64_MAX;
678
679         /* Inbound Region 2 - Bar 4 - Point to HBM */
680         pci_region.mode = PCI_BAR_MATCH_MODE;
681         pci_region.bar = HBM_BAR_ID;
682         pci_region.addr = addr;
683         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
684         if (rc)
685                 return U64_MAX;
686
687         if (gaudi) {
688                 old_addr = gaudi->hbm_bar_cur_addr;
689                 gaudi->hbm_bar_cur_addr = addr;
690         }
691
692         return old_addr;
693 }
694
695 static int gaudi_init_iatu(struct hl_device *hdev)
696 {
697         struct hl_inbound_pci_region inbound_region;
698         struct hl_outbound_pci_region outbound_region;
699         int rc;
700
701         if (hdev->asic_prop.iatu_done_by_fw)
702                 return 0;
703
704         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
705         inbound_region.mode = PCI_BAR_MATCH_MODE;
706         inbound_region.bar = SRAM_BAR_ID;
707         inbound_region.addr = SRAM_BASE_ADDR;
708         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
709         if (rc)
710                 goto done;
711
712         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
713         inbound_region.mode = PCI_BAR_MATCH_MODE;
714         inbound_region.bar = CFG_BAR_ID;
715         inbound_region.addr = SPI_FLASH_BASE_ADDR;
716         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
717         if (rc)
718                 goto done;
719
720         /* Inbound Region 2 - Bar 4 - Point to HBM */
721         inbound_region.mode = PCI_BAR_MATCH_MODE;
722         inbound_region.bar = HBM_BAR_ID;
723         inbound_region.addr = DRAM_PHYS_BASE;
724         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
725         if (rc)
726                 goto done;
727
728         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
729
730         /* Outbound Region 0 - Point to Host */
731         outbound_region.addr = HOST_PHYS_BASE;
732         outbound_region.size = HOST_PHYS_SIZE;
733         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
734
735 done:
736         return rc;
737 }
738
739 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
740 {
741         return RREG32(mmHW_STATE);
742 }
743
744 static int gaudi_early_init(struct hl_device *hdev)
745 {
746         struct asic_fixed_properties *prop = &hdev->asic_prop;
747         struct pci_dev *pdev = hdev->pdev;
748         u32 fw_boot_status;
749         int rc;
750
751         rc = gaudi_set_fixed_properties(hdev);
752         if (rc) {
753                 dev_err(hdev->dev, "Failed setting fixed properties\n");
754                 return rc;
755         }
756
757         /* Check BAR sizes */
758         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
759                 dev_err(hdev->dev,
760                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
761                         SRAM_BAR_ID,
762                         (unsigned long long) pci_resource_len(pdev,
763                                                         SRAM_BAR_ID),
764                         SRAM_BAR_SIZE);
765                 rc = -ENODEV;
766                 goto free_queue_props;
767         }
768
769         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
770                 dev_err(hdev->dev,
771                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
772                         CFG_BAR_ID,
773                         (unsigned long long) pci_resource_len(pdev,
774                                                                 CFG_BAR_ID),
775                         CFG_BAR_SIZE);
776                 rc = -ENODEV;
777                 goto free_queue_props;
778         }
779
780         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
781
782         /* If FW security is enabled at this point it means no access to ELBI */
783         if (hdev->asic_prop.fw_security_enabled) {
784                 hdev->asic_prop.iatu_done_by_fw = true;
785
786                 /*
787                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
788                  * decision can only be taken based on PCI ID security.
789                  */
790                 hdev->asic_prop.gic_interrupts_enable = false;
791                 goto pci_init;
792         }
793
794         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
795                                 &fw_boot_status);
796         if (rc)
797                 goto free_queue_props;
798
799         /* Check whether FW is configuring iATU */
800         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
801                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
802                 hdev->asic_prop.iatu_done_by_fw = true;
803
804 pci_init:
805         rc = hl_pci_init(hdev);
806         if (rc)
807                 goto free_queue_props;
808
809         /* Before continuing in the initialization, we need to read the preboot
810          * version to determine whether we run with a security-enabled firmware
811          */
812         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
813                                         mmCPU_BOOT_DEV_STS0,
814                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
815                                         mmCPU_BOOT_ERR1,
816                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
817         if (rc) {
818                 if (hdev->reset_on_preboot_fail)
819                         hdev->asic_funcs->hw_fini(hdev, true);
820                 goto pci_fini;
821         }
822
823         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
824                 dev_info(hdev->dev,
825                         "H/W state is dirty, must reset before initializing\n");
826                 hdev->asic_funcs->hw_fini(hdev, true);
827         }
828
829         return 0;
830
831 pci_fini:
832         hl_pci_fini(hdev);
833 free_queue_props:
834         kfree(hdev->asic_prop.hw_queues_props);
835         return rc;
836 }
837
838 static int gaudi_early_fini(struct hl_device *hdev)
839 {
840         kfree(hdev->asic_prop.hw_queues_props);
841         hl_pci_fini(hdev);
842
843         return 0;
844 }
845
846 /**
847  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
848  *
849  * @hdev: pointer to hl_device structure
850  *
851  */
852 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
853 {
854         struct asic_fixed_properties *prop = &hdev->asic_prop;
855         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
856         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
857         int rc;
858
859         if (hdev->asic_prop.fw_security_enabled) {
860                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
861
862                 if (rc)
863                         return rc;
864
865                 freq = pll_freq_arr[2];
866         } else {
867                 /* Backward compatibility */
868                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
869                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
870                 nr = RREG32(mmPSOC_CPU_PLL_NR);
871                 nf = RREG32(mmPSOC_CPU_PLL_NF);
872                 od = RREG32(mmPSOC_CPU_PLL_OD);
873
874                 if (div_sel == DIV_SEL_REF_CLK ||
875                                 div_sel == DIV_SEL_DIVIDED_REF) {
876                         if (div_sel == DIV_SEL_REF_CLK)
877                                 freq = PLL_REF_CLK;
878                         else
879                                 freq = PLL_REF_CLK / (div_fctr + 1);
880                 } else if (div_sel == DIV_SEL_PLL_CLK ||
881                         div_sel == DIV_SEL_DIVIDED_PLL) {
882                         pll_clk = PLL_REF_CLK * (nf + 1) /
883                                         ((nr + 1) * (od + 1));
884                         if (div_sel == DIV_SEL_PLL_CLK)
885                                 freq = pll_clk;
886                         else
887                                 freq = pll_clk / (div_fctr + 1);
888                 } else {
889                         dev_warn(hdev->dev,
890                                 "Received invalid div select value: %d",
891                                 div_sel);
892                         freq = 0;
893                 }
894         }
895
896         prop->psoc_timestamp_frequency = freq;
897         prop->psoc_pci_pll_nr = nr;
898         prop->psoc_pci_pll_nf = nf;
899         prop->psoc_pci_pll_od = od;
900         prop->psoc_pci_pll_div_factor = div_fctr;
901
902         return 0;
903 }
904
905 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
906                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
907 {
908         struct asic_fixed_properties *prop = &hdev->asic_prop;
909         struct packet_lin_dma *init_tpc_mem_pkt;
910         struct hl_cs_job *job;
911         struct hl_cb *cb;
912         u64 dst_addr;
913         u32 cb_size, ctl;
914         u8 tpc_id;
915         int rc;
916
917         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
918         if (!cb)
919                 return -EFAULT;
920
921         init_tpc_mem_pkt = cb->kernel_address;
922         cb_size = sizeof(*init_tpc_mem_pkt);
923         memset(init_tpc_mem_pkt, 0, cb_size);
924
925         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
926
927         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
928         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
929         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
930         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
931
932         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
933
934         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
935         dst_addr = (prop->sram_user_base_address &
936                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
937                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
938         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
939
940         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
941         if (!job) {
942                 dev_err(hdev->dev, "Failed to allocate a new job\n");
943                 rc = -ENOMEM;
944                 goto release_cb;
945         }
946
947         job->id = 0;
948         job->user_cb = cb;
949         atomic_inc(&job->user_cb->cs_cnt);
950         job->user_cb_size = cb_size;
951         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
952         job->patched_cb = job->user_cb;
953         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
954
955         hl_debugfs_add_job(hdev, job);
956
957         rc = gaudi_send_job_on_qman0(hdev, job);
958
959         if (rc)
960                 goto free_job;
961
962         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
963                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
964                 if (rc)
965                         break;
966         }
967
968 free_job:
969         hl_userptr_delete_list(hdev, &job->userptr_list);
970         hl_debugfs_remove_job(hdev, job);
971         kfree(job);
972         atomic_dec(&cb->cs_cnt);
973
974 release_cb:
975         hl_cb_put(cb);
976         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
977
978         return rc;
979 }
980
981 /*
982  * gaudi_init_tpc_mem() - Initialize TPC memories.
983  * @hdev: Pointer to hl_device structure.
984  *
985  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
986  *
987  * Return: 0 for success, negative value for error.
988  */
989 static int gaudi_init_tpc_mem(struct hl_device *hdev)
990 {
991         const struct firmware *fw;
992         size_t fw_size;
993         void *cpu_addr;
994         dma_addr_t dma_handle;
995         int rc, count = 5;
996
997 again:
998         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
999         if (rc == -EINTR && count-- > 0) {
1000                 msleep(50);
1001                 goto again;
1002         }
1003
1004         if (rc) {
1005                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1006                                 GAUDI_TPC_FW_FILE);
1007                 goto out;
1008         }
1009
1010         fw_size = fw->size;
1011         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
1012                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
1013         if (!cpu_addr) {
1014                 dev_err(hdev->dev,
1015                         "Failed to allocate %zu of dma memory for TPC kernel\n",
1016                         fw_size);
1017                 rc = -ENOMEM;
1018                 goto out;
1019         }
1020
1021         memcpy(cpu_addr, fw->data, fw_size);
1022
1023         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1024
1025         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
1026                         dma_handle);
1027
1028 out:
1029         release_firmware(fw);
1030         return rc;
1031 }
1032
1033 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1034 {
1035         struct gaudi_device *gaudi = hdev->asic_specific;
1036         struct gaudi_collective_properties *prop = &gaudi->collective_props;
1037         struct hl_hw_queue *q;
1038         u32 i, sob_id, sob_group_id, queue_id;
1039
1040         /* Iterate through SOB groups and assign a SOB for each slave queue */
1041         sob_group_id =
1042                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1043         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1044
1045         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1046         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1047                 q = &hdev->kernel_queues[queue_id + (4 * i)];
1048                 q->sync_stream_prop.collective_sob_id = sob_id + i;
1049         }
1050
1051         /* Both DMA5 and TPC7 use the same resources since only a single
1052          * engine need to participate in the reduction process
1053          */
1054         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1055         q = &hdev->kernel_queues[queue_id];
1056         q->sync_stream_prop.collective_sob_id =
1057                         sob_id + NIC_NUMBER_OF_ENGINES;
1058
1059         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1060         q = &hdev->kernel_queues[queue_id];
1061         q->sync_stream_prop.collective_sob_id =
1062                         sob_id + NIC_NUMBER_OF_ENGINES;
1063 }
1064
1065 static void gaudi_sob_group_hw_reset(struct kref *ref)
1066 {
1067         struct gaudi_hw_sob_group *hw_sob_group =
1068                 container_of(ref, struct gaudi_hw_sob_group, kref);
1069         struct hl_device *hdev = hw_sob_group->hdev;
1070         int i;
1071
1072         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1073                 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1074                         (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1075
1076         kref_init(&hw_sob_group->kref);
1077 }
1078
1079 static void gaudi_sob_group_reset_error(struct kref *ref)
1080 {
1081         struct gaudi_hw_sob_group *hw_sob_group =
1082                 container_of(ref, struct gaudi_hw_sob_group, kref);
1083         struct hl_device *hdev = hw_sob_group->hdev;
1084
1085         dev_crit(hdev->dev,
1086                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1087                 hw_sob_group->base_sob_id);
1088 }
1089
1090 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1091 {
1092         struct gaudi_collective_properties *prop;
1093         int i;
1094
1095         prop = &gaudi->collective_props;
1096
1097         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1098
1099         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1100                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1101                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1102                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1103         /* Set collective engine bit */
1104         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1105                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1106 }
1107
1108 static int gaudi_collective_init(struct hl_device *hdev)
1109 {
1110         u32 i, sob_id, reserved_sobs_per_group;
1111         struct gaudi_collective_properties *prop;
1112         struct gaudi_device *gaudi;
1113
1114         gaudi = hdev->asic_specific;
1115         prop = &gaudi->collective_props;
1116         sob_id = hdev->asic_prop.collective_first_sob;
1117
1118         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1119         reserved_sobs_per_group =
1120                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1121
1122         /* Init SOB groups */
1123         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1124                 prop->hw_sob_group[i].hdev = hdev;
1125                 prop->hw_sob_group[i].base_sob_id = sob_id;
1126                 sob_id += reserved_sobs_per_group;
1127                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1128         }
1129
1130         for (i = 0 ; i < QMAN_STREAMS; i++) {
1131                 prop->next_sob_group_val[i] = 1;
1132                 prop->curr_sob_group_idx[i] = 0;
1133                 gaudi_collective_map_sobs(hdev, i);
1134         }
1135
1136         gaudi_collective_mstr_sob_mask_set(gaudi);
1137
1138         return 0;
1139 }
1140
1141 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1142 {
1143         struct gaudi_device *gaudi = hdev->asic_specific;
1144         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1145
1146         kref_put(&cprop->hw_sob_group[sob_group].kref,
1147                                         gaudi_sob_group_hw_reset);
1148 }
1149
1150 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1151                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1152 {
1153         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1154         struct gaudi_collective_properties *cprop;
1155         struct hl_gen_wait_properties wait_prop;
1156         struct hl_sync_stream_properties *prop;
1157         struct gaudi_device *gaudi;
1158
1159         gaudi = hdev->asic_specific;
1160         cprop = &gaudi->collective_props;
1161         queue_id = job->hw_queue_id;
1162         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1163
1164         master_sob_base =
1165                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1166         master_monitor = prop->collective_mstr_mon_id[0];
1167
1168         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1169
1170         dev_dbg(hdev->dev,
1171                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1172                 master_sob_base, cprop->mstr_sob_mask[0],
1173                 cprop->next_sob_group_val[stream],
1174                 master_monitor, queue_id);
1175
1176         wait_prop.data = (void *) job->patched_cb;
1177         wait_prop.sob_base = master_sob_base;
1178         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1179         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1180         wait_prop.mon_id = master_monitor;
1181         wait_prop.q_idx = queue_id;
1182         wait_prop.size = cb_size;
1183         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1184
1185         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1186         master_monitor = prop->collective_mstr_mon_id[1];
1187
1188         dev_dbg(hdev->dev,
1189                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1190                 master_sob_base, cprop->mstr_sob_mask[1],
1191                 cprop->next_sob_group_val[stream],
1192                 master_monitor, queue_id);
1193
1194         wait_prop.sob_base = master_sob_base;
1195         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1196         wait_prop.mon_id = master_monitor;
1197         wait_prop.size = cb_size;
1198         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1199 }
1200
1201 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1202                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1203 {
1204         struct hl_gen_wait_properties wait_prop;
1205         struct hl_sync_stream_properties *prop;
1206         u32 queue_id, cb_size = 0;
1207
1208         queue_id = job->hw_queue_id;
1209         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1210
1211         if (job->cs->encaps_signals) {
1212                 /* use the encaps signal handle store earlier in the flow
1213                  * and set the SOB information from the encaps
1214                  * signals handle
1215                  */
1216                 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1217                                                 cs_cmpl);
1218
1219                 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1220                                 job->cs->sequence,
1221                                 cs_cmpl->hw_sob->sob_id,
1222                                 cs_cmpl->sob_val);
1223         }
1224
1225         /* Add to wait CBs using slave monitor */
1226         wait_prop.data = (void *) job->user_cb;
1227         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1228         wait_prop.sob_mask = 0x1;
1229         wait_prop.sob_val = cs_cmpl->sob_val;
1230         wait_prop.mon_id = prop->collective_slave_mon_id;
1231         wait_prop.q_idx = queue_id;
1232         wait_prop.size = cb_size;
1233
1234         dev_dbg(hdev->dev,
1235                 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1236                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1237                 prop->collective_slave_mon_id, queue_id);
1238
1239         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1240
1241         dev_dbg(hdev->dev,
1242                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1243                 prop->collective_sob_id, queue_id);
1244
1245         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1246                         prop->collective_sob_id, cb_size, false);
1247 }
1248
1249 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1250 {
1251         struct hl_cs_compl *signal_cs_cmpl =
1252                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1253         struct hl_cs_compl *cs_cmpl =
1254                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1255         struct gaudi_collective_properties *cprop;
1256         u32 stream, queue_id, sob_group_offset;
1257         struct gaudi_device *gaudi;
1258         struct hl_device *hdev;
1259         struct hl_cs_job *job;
1260         struct hl_ctx *ctx;
1261
1262         ctx = cs->ctx;
1263         hdev = ctx->hdev;
1264         gaudi = hdev->asic_specific;
1265         cprop = &gaudi->collective_props;
1266
1267         /* In encaps signals case the SOB info will be retrieved from
1268          * the handle in gaudi_collective_slave_init_job.
1269          */
1270         if (!cs->encaps_signals) {
1271                 /* copy the SOB id and value of the signal CS */
1272                 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1273                 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1274         }
1275
1276         /* check again if the signal cs already completed.
1277          * if yes then don't send any wait cs since the hw_sob
1278          * could be in reset already. if signal is not completed
1279          * then get refcount to hw_sob to prevent resetting the sob
1280          * while wait cs is not submitted.
1281          * note that this check is protected by two locks,
1282          * hw queue lock and completion object lock,
1283          * and the same completion object lock also protects
1284          * the hw_sob reset handler function.
1285          * The hw_queue lock prevent out of sync of hw_sob
1286          * refcount value, changed by signal/wait flows.
1287          */
1288         spin_lock(&signal_cs_cmpl->lock);
1289
1290         if (completion_done(&cs->signal_fence->completion)) {
1291                 spin_unlock(&signal_cs_cmpl->lock);
1292                 return -EINVAL;
1293         }
1294         /* Increment kref since all slave queues are now waiting on it */
1295         kref_get(&cs_cmpl->hw_sob->kref);
1296
1297         spin_unlock(&signal_cs_cmpl->lock);
1298
1299         /* Calculate the stream from collective master queue (1st job) */
1300         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1301         stream = job->hw_queue_id % 4;
1302         sob_group_offset =
1303                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1304
1305         list_for_each_entry(job, &cs->job_list, cs_node) {
1306                 queue_id = job->hw_queue_id;
1307
1308                 if (hdev->kernel_queues[queue_id].collective_mode ==
1309                                 HL_COLLECTIVE_MASTER)
1310                         gaudi_collective_master_init_job(hdev, job, stream,
1311                                                 sob_group_offset);
1312                 else
1313                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1314         }
1315
1316         cs_cmpl->sob_group = sob_group_offset;
1317
1318         /* Handle sob group kref and wraparound */
1319         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1320         cprop->next_sob_group_val[stream]++;
1321
1322         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1323                 /*
1324                  * Decrement as we reached the max value.
1325                  * The release function won't be called here as we've
1326                  * just incremented the refcount.
1327                  */
1328                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1329                                 gaudi_sob_group_reset_error);
1330                 cprop->next_sob_group_val[stream] = 1;
1331                 /* only two SOBs are currently in use */
1332                 cprop->curr_sob_group_idx[stream] =
1333                         (cprop->curr_sob_group_idx[stream] + 1) &
1334                                                         (HL_RSVD_SOBS - 1);
1335
1336                 gaudi_collective_map_sobs(hdev, stream);
1337
1338                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1339                                 cprop->curr_sob_group_idx[stream], stream);
1340         }
1341
1342         mb();
1343         hl_fence_put(cs->signal_fence);
1344         cs->signal_fence = NULL;
1345
1346         return 0;
1347 }
1348
1349 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1350                 struct hl_ctx *ctx, struct hl_cs *cs,
1351                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1352                 u32 encaps_signal_offset)
1353 {
1354         struct hw_queue_properties *hw_queue_prop;
1355         struct hl_cs_counters_atomic *cntr;
1356         struct hl_cs_job *job;
1357         struct hl_cb *cb;
1358         u32 cb_size;
1359         bool patched_cb;
1360
1361         cntr = &hdev->aggregated_cs_counters;
1362
1363         if (mode == HL_COLLECTIVE_MASTER) {
1364                 /* CB size of collective master queue contains
1365                  * 4 msg short packets for monitor 1 configuration
1366                  * 1 fence packet
1367                  * 4 msg short packets for monitor 2 configuration
1368                  * 1 fence packet
1369                  * 2 msg prot packets for completion and MSI-X
1370                  */
1371                 cb_size = sizeof(struct packet_msg_short) * 8 +
1372                                 sizeof(struct packet_fence) * 2 +
1373                                 sizeof(struct packet_msg_prot) * 2;
1374                 patched_cb = true;
1375         } else {
1376                 /* CB size of collective slave queues contains
1377                  * 4 msg short packets for monitor configuration
1378                  * 1 fence packet
1379                  * 1 additional msg short packet for sob signal
1380                  */
1381                 cb_size = sizeof(struct packet_msg_short) * 5 +
1382                                 sizeof(struct packet_fence);
1383                 patched_cb = false;
1384         }
1385
1386         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1387         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1388         if (!job) {
1389                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1390                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1391                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1392                 return -ENOMEM;
1393         }
1394
1395         /* Allocate internal mapped CB for non patched CBs */
1396         cb = hl_cb_kernel_create(hdev, cb_size,
1397                         hdev->mmu_enable && !patched_cb);
1398         if (!cb) {
1399                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1400                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1401                 kfree(job);
1402                 return -EFAULT;
1403         }
1404
1405         job->id = 0;
1406         job->cs = cs;
1407         job->user_cb = cb;
1408         atomic_inc(&job->user_cb->cs_cnt);
1409         job->user_cb_size = cb_size;
1410         job->hw_queue_id = queue_id;
1411
1412         /* since its guaranteed to have only one chunk in the collective wait
1413          * cs, we can use this chunk to set the encapsulated signal offset
1414          * in the jobs.
1415          */
1416         if (cs->encaps_signals)
1417                 job->encaps_sig_wait_offset = encaps_signal_offset;
1418
1419         /*
1420          * No need in parsing, user CB is the patched CB.
1421          * We call hl_cb_destroy() out of two reasons - we don't need
1422          * the CB in the CB idr anymore and to decrement its refcount as
1423          * it was incremented inside hl_cb_kernel_create().
1424          */
1425         if (patched_cb)
1426                 job->patched_cb = job->user_cb;
1427         else
1428                 job->patched_cb = NULL;
1429
1430         job->job_cb_size = job->user_cb_size;
1431         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1432
1433         /* increment refcount as for external queues we get completion */
1434         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1435                 cs_get(cs);
1436
1437         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1438
1439         list_add_tail(&job->cs_node, &cs->job_list);
1440
1441         hl_debugfs_add_job(hdev, job);
1442
1443         return 0;
1444 }
1445
1446 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1447                 struct hl_ctx *ctx, struct hl_cs *cs,
1448                 u32 wait_queue_id, u32 collective_engine_id,
1449                 u32 encaps_signal_offset)
1450 {
1451         struct gaudi_device *gaudi = hdev->asic_specific;
1452         struct hw_queue_properties *hw_queue_prop;
1453         u32 queue_id, collective_queue, num_jobs;
1454         u32 stream, nic_queue, nic_idx = 0;
1455         bool skip;
1456         int i, rc = 0;
1457
1458         /* Verify wait queue id is configured as master */
1459         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1460         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1461                 dev_err(hdev->dev,
1462                         "Queue %d is not configured as collective master\n",
1463                         wait_queue_id);
1464                 return -EINVAL;
1465         }
1466
1467         /* Verify engine id is supported */
1468         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1469                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1470                 dev_err(hdev->dev,
1471                         "Collective wait does not support engine %u\n",
1472                         collective_engine_id);
1473                 return -EINVAL;
1474         }
1475
1476         stream = wait_queue_id % 4;
1477
1478         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1479                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1480         else
1481                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1482
1483         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1484         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1485
1486         /* First job goes to the collective master queue, it will wait for
1487          * the collective slave queues to finish execution.
1488          * The synchronization is done using two monitors:
1489          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1490          * reduction engine (DMA5/TPC7).
1491          *
1492          * Rest of the jobs goes to the collective slave queues which will
1493          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1494          */
1495         for (i = 0 ; i < num_jobs ; i++) {
1496                 if (i == 0) {
1497                         queue_id = wait_queue_id;
1498                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1499                                 HL_COLLECTIVE_MASTER, queue_id,
1500                                 wait_queue_id, encaps_signal_offset);
1501                 } else {
1502                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1503                                 if (gaudi->hw_cap_initialized &
1504                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1505                                         skip = false;
1506                                 else
1507                                         skip = true;
1508
1509                                 queue_id = nic_queue;
1510                                 nic_queue += 4;
1511                                 nic_idx++;
1512
1513                                 if (skip)
1514                                         continue;
1515                         } else {
1516                                 queue_id = collective_queue;
1517                         }
1518
1519                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1520                                 HL_COLLECTIVE_SLAVE, queue_id,
1521                                 wait_queue_id, encaps_signal_offset);
1522                 }
1523
1524                 if (rc)
1525                         return rc;
1526         }
1527
1528         return rc;
1529 }
1530
1531 static int gaudi_late_init(struct hl_device *hdev)
1532 {
1533         struct gaudi_device *gaudi = hdev->asic_specific;
1534         int rc;
1535
1536         rc = gaudi->cpucp_info_get(hdev);
1537         if (rc) {
1538                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1539                 return rc;
1540         }
1541
1542         if ((hdev->card_type == cpucp_card_type_pci) &&
1543                         (hdev->nic_ports_mask & 0x3)) {
1544                 dev_info(hdev->dev,
1545                         "PCI card detected, only 8 ports are enabled\n");
1546                 hdev->nic_ports_mask &= ~0x3;
1547
1548                 /* Stop and disable unused NIC QMANs */
1549                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1550                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1551                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1552
1553                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1554                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1555                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1556
1557                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1558                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1559
1560                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1561         }
1562
1563         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1564         if (rc) {
1565                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1566                 return rc;
1567         }
1568
1569         /* Scrub both SRAM and DRAM */
1570         rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
1571         if (rc)
1572                 goto disable_pci_access;
1573
1574         rc = gaudi_fetch_psoc_frequency(hdev);
1575         if (rc) {
1576                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1577                 goto disable_pci_access;
1578         }
1579
1580         rc = gaudi_mmu_clear_pgt_range(hdev);
1581         if (rc) {
1582                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1583                 goto disable_pci_access;
1584         }
1585
1586         rc = gaudi_init_tpc_mem(hdev);
1587         if (rc) {
1588                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1589                 goto disable_pci_access;
1590         }
1591
1592         rc = gaudi_collective_init(hdev);
1593         if (rc) {
1594                 dev_err(hdev->dev, "Failed to init collective\n");
1595                 goto disable_pci_access;
1596         }
1597
1598         /* We only support a single ASID for the user, so for the sake of optimization, just
1599          * initialize the ASID one time during device initialization with the fixed value of 1
1600          */
1601         gaudi_mmu_prepare(hdev, 1);
1602
1603         return 0;
1604
1605 disable_pci_access:
1606         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1607
1608         return rc;
1609 }
1610
1611 static void gaudi_late_fini(struct hl_device *hdev)
1612 {
1613         const struct hwmon_channel_info **channel_info_arr;
1614         int i = 0;
1615
1616         if (!hdev->hl_chip_info->info)
1617                 return;
1618
1619         channel_info_arr = hdev->hl_chip_info->info;
1620
1621         while (channel_info_arr[i]) {
1622                 kfree(channel_info_arr[i]->config);
1623                 kfree(channel_info_arr[i]);
1624                 i++;
1625         }
1626
1627         kfree(channel_info_arr);
1628
1629         hdev->hl_chip_info->info = NULL;
1630 }
1631
1632 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1633 {
1634         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1635         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1636         int i, j, rc = 0;
1637
1638         /*
1639          * The device CPU works with 40-bits addresses, while bit 39 must be set
1640          * to '1' when accessing the host.
1641          * Bits 49:39 of the full host address are saved for a later
1642          * configuration of the HW to perform extension to 50 bits.
1643          * Because there is a single HW register that holds the extension bits,
1644          * these bits must be identical in all allocated range.
1645          */
1646
1647         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1648                 virt_addr_arr[i] =
1649                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1650                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1651                                                 &dma_addr_arr[i],
1652                                                 GFP_KERNEL | __GFP_ZERO);
1653                 if (!virt_addr_arr[i]) {
1654                         rc = -ENOMEM;
1655                         goto free_dma_mem_arr;
1656                 }
1657
1658                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1659                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1660                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1661                         break;
1662         }
1663
1664         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1665                 dev_err(hdev->dev,
1666                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1667                 rc = -EFAULT;
1668                 goto free_dma_mem_arr;
1669         }
1670
1671         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1672         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1673         hdev->cpu_pci_msb_addr =
1674                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1675
1676         if (!hdev->asic_prop.fw_security_enabled)
1677                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1678
1679 free_dma_mem_arr:
1680         for (j = 0 ; j < i ; j++)
1681                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1682                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1683                                                 virt_addr_arr[j],
1684                                                 dma_addr_arr[j]);
1685
1686         return rc;
1687 }
1688
1689 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1690 {
1691         struct gaudi_device *gaudi = hdev->asic_specific;
1692         struct gaudi_internal_qman_info *q;
1693         u32 i;
1694
1695         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1696                 q = &gaudi->internal_qmans[i];
1697                 if (!q->pq_kernel_addr)
1698                         continue;
1699                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1700                                                         q->pq_kernel_addr,
1701                                                         q->pq_dma_addr);
1702         }
1703 }
1704
1705 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1706 {
1707         struct gaudi_device *gaudi = hdev->asic_specific;
1708         struct gaudi_internal_qman_info *q;
1709         int rc, i;
1710
1711         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1712                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1713                         continue;
1714
1715                 q = &gaudi->internal_qmans[i];
1716
1717                 switch (i) {
1718                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1719                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1720                         break;
1721                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1722                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1723                         break;
1724                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1725                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1726                         break;
1727                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1728                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1729                         break;
1730                 default:
1731                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1732                         rc = -EINVAL;
1733                         goto free_internal_qmans_pq_mem;
1734                 }
1735
1736                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1737                                                 hdev, q->pq_size,
1738                                                 &q->pq_dma_addr,
1739                                                 GFP_KERNEL | __GFP_ZERO);
1740                 if (!q->pq_kernel_addr) {
1741                         rc = -ENOMEM;
1742                         goto free_internal_qmans_pq_mem;
1743                 }
1744         }
1745
1746         return 0;
1747
1748 free_internal_qmans_pq_mem:
1749         gaudi_free_internal_qmans_pq_mem(hdev);
1750         return rc;
1751 }
1752
1753 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1754 {
1755         struct asic_fixed_properties *prop = &hdev->asic_prop;
1756         struct pci_mem_region *region;
1757
1758         /* CFG */
1759         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1760         region->region_base = CFG_BASE;
1761         region->region_size = CFG_SIZE;
1762         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1763         region->bar_size = CFG_BAR_SIZE;
1764         region->bar_id = CFG_BAR_ID;
1765         region->used = 1;
1766
1767         /* SRAM */
1768         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1769         region->region_base = SRAM_BASE_ADDR;
1770         region->region_size = SRAM_SIZE;
1771         region->offset_in_bar = 0;
1772         region->bar_size = SRAM_BAR_SIZE;
1773         region->bar_id = SRAM_BAR_ID;
1774         region->used = 1;
1775
1776         /* DRAM */
1777         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1778         region->region_base = DRAM_PHYS_BASE;
1779         region->region_size = hdev->asic_prop.dram_size;
1780         region->offset_in_bar = 0;
1781         region->bar_size = prop->dram_pci_bar_size;
1782         region->bar_id = HBM_BAR_ID;
1783         region->used = 1;
1784
1785         /* SP SRAM */
1786         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1787         region->region_base = PSOC_SCRATCHPAD_ADDR;
1788         region->region_size = PSOC_SCRATCHPAD_SIZE;
1789         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1790         region->bar_size = CFG_BAR_SIZE;
1791         region->bar_id = CFG_BAR_ID;
1792         region->used = 1;
1793 }
1794
1795 static int gaudi_sw_init(struct hl_device *hdev)
1796 {
1797         struct gaudi_device *gaudi;
1798         u32 i, event_id = 0;
1799         int rc;
1800
1801         /* Allocate device structure */
1802         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1803         if (!gaudi)
1804                 return -ENOMEM;
1805
1806         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1807                 if (gaudi_irq_map_table[i].valid) {
1808                         if (event_id == GAUDI_EVENT_SIZE) {
1809                                 dev_err(hdev->dev,
1810                                         "Event array exceeds the limit of %u events\n",
1811                                         GAUDI_EVENT_SIZE);
1812                                 rc = -EINVAL;
1813                                 goto free_gaudi_device;
1814                         }
1815
1816                         gaudi->events[event_id++] =
1817                                         gaudi_irq_map_table[i].fc_id;
1818                 }
1819         }
1820
1821         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1822
1823         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1824
1825         hdev->asic_specific = gaudi;
1826
1827         /* Create DMA pool for small allocations */
1828         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1829                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1830         if (!hdev->dma_pool) {
1831                 dev_err(hdev->dev, "failed to create DMA pool\n");
1832                 rc = -ENOMEM;
1833                 goto free_gaudi_device;
1834         }
1835
1836         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1837         if (rc)
1838                 goto free_dma_pool;
1839
1840         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1841         if (!hdev->cpu_accessible_dma_pool) {
1842                 dev_err(hdev->dev,
1843                         "Failed to create CPU accessible DMA pool\n");
1844                 rc = -ENOMEM;
1845                 goto free_cpu_dma_mem;
1846         }
1847
1848         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1849                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1850                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1851         if (rc) {
1852                 dev_err(hdev->dev,
1853                         "Failed to add memory to CPU accessible DMA pool\n");
1854                 rc = -EFAULT;
1855                 goto free_cpu_accessible_dma_pool;
1856         }
1857
1858         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1859         if (rc)
1860                 goto free_cpu_accessible_dma_pool;
1861
1862         spin_lock_init(&gaudi->hw_queues_lock);
1863         mutex_init(&gaudi->clk_gate_mutex);
1864
1865         hdev->supports_sync_stream = true;
1866         hdev->supports_coresight = true;
1867         hdev->supports_staged_submission = true;
1868         hdev->supports_wait_for_multi_cs = true;
1869
1870         hdev->asic_funcs->set_pci_memory_regions(hdev);
1871
1872         return 0;
1873
1874 free_cpu_accessible_dma_pool:
1875         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1876 free_cpu_dma_mem:
1877         if (!hdev->asic_prop.fw_security_enabled)
1878                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1879                                         hdev->cpu_pci_msb_addr);
1880         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1881                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1882                         hdev->cpu_accessible_dma_mem,
1883                         hdev->cpu_accessible_dma_address);
1884 free_dma_pool:
1885         dma_pool_destroy(hdev->dma_pool);
1886 free_gaudi_device:
1887         kfree(gaudi);
1888         return rc;
1889 }
1890
1891 static int gaudi_sw_fini(struct hl_device *hdev)
1892 {
1893         struct gaudi_device *gaudi = hdev->asic_specific;
1894
1895         gaudi_free_internal_qmans_pq_mem(hdev);
1896
1897         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1898
1899         if (!hdev->asic_prop.fw_security_enabled)
1900                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1901                                         hdev->cpu_pci_msb_addr);
1902
1903         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1904                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1905                         hdev->cpu_accessible_dma_mem,
1906                         hdev->cpu_accessible_dma_address);
1907
1908         dma_pool_destroy(hdev->dma_pool);
1909
1910         mutex_destroy(&gaudi->clk_gate_mutex);
1911
1912         kfree(gaudi);
1913
1914         return 0;
1915 }
1916
1917 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1918 {
1919         struct hl_device *hdev = arg;
1920         int i;
1921
1922         if (hdev->disabled)
1923                 return IRQ_HANDLED;
1924
1925         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1926                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1927
1928         hl_irq_handler_eq(irq, &hdev->event_queue);
1929
1930         return IRQ_HANDLED;
1931 }
1932
1933 /*
1934  * For backward compatibility, new MSI interrupts should be set after the
1935  * existing CPU and NIC interrupts.
1936  */
1937 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1938                                 bool cpu_eq)
1939 {
1940         int msi_vec;
1941
1942         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1943                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1944                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1945
1946         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1947                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1948
1949         return pci_irq_vector(hdev->pdev, msi_vec);
1950 }
1951
1952 static int gaudi_enable_msi_single(struct hl_device *hdev)
1953 {
1954         int rc, irq;
1955
1956         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1957
1958         irq = gaudi_pci_irq_vector(hdev, 0, false);
1959         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1960                         "gaudi single msi", hdev);
1961         if (rc)
1962                 dev_err(hdev->dev,
1963                         "Failed to request single MSI IRQ\n");
1964
1965         return rc;
1966 }
1967
1968 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1969 {
1970         int cq_cnt = hdev->asic_prop.completion_queues_count;
1971         int rc, i, irq_cnt_init, irq;
1972
1973         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1974                 irq = gaudi_pci_irq_vector(hdev, i, false);
1975                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1976                                 &hdev->completion_queue[i]);
1977                 if (rc) {
1978                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1979                         goto free_irqs;
1980                 }
1981         }
1982
1983         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1984         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1985                                 &hdev->event_queue);
1986         if (rc) {
1987                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1988                 goto free_irqs;
1989         }
1990
1991         return 0;
1992
1993 free_irqs:
1994         for (i = 0 ; i < irq_cnt_init ; i++)
1995                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1996                                 &hdev->completion_queue[i]);
1997         return rc;
1998 }
1999
2000 static int gaudi_enable_msi(struct hl_device *hdev)
2001 {
2002         struct gaudi_device *gaudi = hdev->asic_specific;
2003         int rc;
2004
2005         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2006                 return 0;
2007
2008         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2009         if (rc < 0) {
2010                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2011                 return rc;
2012         }
2013
2014         if (rc < NUMBER_OF_INTERRUPTS) {
2015                 gaudi->multi_msi_mode = false;
2016                 rc = gaudi_enable_msi_single(hdev);
2017         } else {
2018                 gaudi->multi_msi_mode = true;
2019                 rc = gaudi_enable_msi_multi(hdev);
2020         }
2021
2022         if (rc)
2023                 goto free_pci_irq_vectors;
2024
2025         gaudi->hw_cap_initialized |= HW_CAP_MSI;
2026
2027         return 0;
2028
2029 free_pci_irq_vectors:
2030         pci_free_irq_vectors(hdev->pdev);
2031         return rc;
2032 }
2033
2034 static void gaudi_sync_irqs(struct hl_device *hdev)
2035 {
2036         struct gaudi_device *gaudi = hdev->asic_specific;
2037         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
2038
2039         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2040                 return;
2041
2042         /* Wait for all pending IRQs to be finished */
2043         if (gaudi->multi_msi_mode) {
2044                 for (i = 0 ; i < cq_cnt ; i++)
2045                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
2046
2047                 synchronize_irq(gaudi_pci_irq_vector(hdev,
2048                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
2049                                                 true));
2050         } else {
2051                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2052         }
2053 }
2054
2055 static void gaudi_disable_msi(struct hl_device *hdev)
2056 {
2057         struct gaudi_device *gaudi = hdev->asic_specific;
2058         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
2059
2060         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2061                 return;
2062
2063         gaudi_sync_irqs(hdev);
2064
2065         if (gaudi->multi_msi_mode) {
2066                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
2067                                                 true);
2068                 free_irq(irq, &hdev->event_queue);
2069
2070                 for (i = 0 ; i < cq_cnt ; i++) {
2071                         irq = gaudi_pci_irq_vector(hdev, i, false);
2072                         free_irq(irq, &hdev->completion_queue[i]);
2073                 }
2074         } else {
2075                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2076         }
2077
2078         pci_free_irq_vectors(hdev->pdev);
2079
2080         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2081 }
2082
2083 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2084 {
2085         struct gaudi_device *gaudi = hdev->asic_specific;
2086
2087         if (hdev->asic_prop.fw_security_enabled)
2088                 return;
2089
2090         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2091                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2092                 return;
2093
2094         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2095                 return;
2096
2097         if (!hdev->sram_scrambler_enable)
2098                 return;
2099
2100         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2101                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2103                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2105                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2107                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2109                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2111                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2113                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2115                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2116
2117         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2118                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2119         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2120                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2121         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2122                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2123         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2124                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2125         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2126                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2127         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2128                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2129         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2130                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2131         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2132                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2133
2134         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2135                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2136         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2137                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2138         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2139                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2140         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2141                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2142         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2143                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2144         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2145                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2146         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2147                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2148         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2149                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2150
2151         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2152 }
2153
2154 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2155 {
2156         struct gaudi_device *gaudi = hdev->asic_specific;
2157
2158         if (hdev->asic_prop.fw_security_enabled)
2159                 return;
2160
2161         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2162                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2163                 return;
2164
2165         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2166                 return;
2167
2168         if (!hdev->dram_scrambler_enable)
2169                 return;
2170
2171         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2172                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2174                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2176                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2178                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2180                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2182                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2184                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2185         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2186                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2187
2188         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2189                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2190         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2191                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2192         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2193                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2194         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2195                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2196         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2197                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2198         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2199                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2200         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2201                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2202         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2203                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2204
2205         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2206                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2207         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2208                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2209         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2210                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2211         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2212                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2213         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2214                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2215         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2216                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2217         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2218                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2219         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2220                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2221
2222         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2223 }
2224
2225 static void gaudi_init_e2e(struct hl_device *hdev)
2226 {
2227         if (hdev->asic_prop.fw_security_enabled)
2228                 return;
2229
2230         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2231                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2232                 return;
2233
2234         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2235         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2236         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2237         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2238
2239         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2240         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2241         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2242         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2243
2244         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2245         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2246         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2247         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2248
2249         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2250         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2251         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2252         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2253
2254         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2255         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2256         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2257         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2258
2259         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2260         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2261         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2262         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2263
2264         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2265         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2266         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2267         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2268
2269         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2270         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2271         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2272         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2273
2274         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2275         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2276         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2277         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2278
2279         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2280         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2281         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2282         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2283
2284         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2285         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2286         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2287         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2288
2289         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2290         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2291         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2292         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2293
2294         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2295         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2296         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2297         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2298
2299         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2300         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2301         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2302         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2303
2304         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2305         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2306         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2307         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2308
2309         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2310         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2311         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2312         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2313
2314         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2315         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2316         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2317         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2318
2319         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2320         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2321         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2322         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2323
2324         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2325         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2326         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2327         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2328
2329         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2330         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2331         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2332         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2333
2334         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2335         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2336         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2337         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2338
2339         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2340         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2341         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2342         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2343
2344         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2345         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2346         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2347         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2348
2349         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2350         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2351         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2352         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2353
2354         if (!hdev->dram_scrambler_enable) {
2355                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2356                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2357                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2358                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2359
2360                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2361                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2362                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2363                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2364
2365                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2366                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2367                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2368                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2369
2370                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2371                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2372                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2373                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2374
2375                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2376                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2377                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2378                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2379
2380                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2381                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2382                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2383                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2384
2385                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2386                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2387                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2388                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2389
2390                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2391                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2392                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2393                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2394
2395                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2396                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2397                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2398                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2399
2400                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2401                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2402                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2403                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2404
2405                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2406                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2407                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2408                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2409
2410                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2411                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2412                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2413                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2414
2415                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2416                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2417                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2418                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2419
2420                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2421                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2422                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2423                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2424
2425                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2426                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2427                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2428                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2429
2430                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2431                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2432                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2433                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2434
2435                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2436                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2437                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2438                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2439
2440                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2441                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2442                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2443                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2444
2445                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2446                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2447                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2448                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2449
2450                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2451                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2452                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2453                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2454
2455                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2456                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2457                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2458                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2459
2460                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2461                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2462                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2463                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2464
2465                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2466                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2467                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2468                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2469
2470                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2471                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2472                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2473                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2474         }
2475
2476         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2477                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2478         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2479                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2480
2481         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2482                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2483         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2484                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2485
2486         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2487                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2488         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2489                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2490
2491         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2492                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2493         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2494                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2495
2496         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2497                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2498         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2499                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2500
2501         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2502                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2503         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2504                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2505
2506         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2507                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2508         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2509                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2510
2511         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2512                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2513         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2514                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2515
2516         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2517                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2518         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2519                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2520
2521         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2522                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2523         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2524                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2525
2526         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2527                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2528         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2529                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2530
2531         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2532                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2533         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2534                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2535
2536         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2537                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2538         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2539                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2540
2541         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2542                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2543         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2544                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2545
2546         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2547                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2548         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2549                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2550
2551         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2552                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2553         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2554                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2555
2556         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2557                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2558         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2559                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2560
2561         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2562                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2563         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2564                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2565
2566         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2567                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2568         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2569                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2570
2571         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2572                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2573         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2574                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2575
2576         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2577                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2578         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2579                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2580
2581         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2582                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2583         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2584                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2585
2586         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2587                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2588         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2589                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2590
2591         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2592                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2593         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2594                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2595 }
2596
2597 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2598 {
2599         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2600
2601         if (hdev->asic_prop.fw_security_enabled)
2602                 return;
2603
2604         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2605                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2606                 return;
2607
2608         hbm0_wr = 0x33333333;
2609         hbm0_rd = 0x77777777;
2610         hbm1_wr = 0x55555555;
2611         hbm1_rd = 0xDDDDDDDD;
2612
2613         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2614         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2615         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2616         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2617
2618         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2619         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2620         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2621         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2622
2623         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2624         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2625         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2626         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2627
2628         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2629         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2630         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2631         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2632
2633         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2634                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2635                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2636         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2637                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2638                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2639         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2640                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2641                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2642         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2643                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2644                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2645
2646         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2647                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2648                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2649         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2650                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2651                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2652         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2653                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2654                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2655         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2656                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2657                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2658 }
2659
2660 static void gaudi_init_golden_registers(struct hl_device *hdev)
2661 {
2662         u32 tpc_offset;
2663         int tpc_id, i;
2664
2665         gaudi_init_e2e(hdev);
2666         gaudi_init_hbm_cred(hdev);
2667
2668         for (tpc_id = 0, tpc_offset = 0;
2669                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2670                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2671                 /* Mask all arithmetic interrupts from TPC */
2672                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2673                 /* Set 16 cache lines */
2674                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2675                                 ICACHE_FETCH_LINE_NUM, 2);
2676         }
2677
2678         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2679         for (i = 0 ; i < 128 ; i += 8)
2680                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2681
2682         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2683         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2684         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2685         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2686 }
2687
2688 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2689                                         int qman_id, dma_addr_t qman_pq_addr)
2690 {
2691         struct cpu_dyn_regs *dyn_regs =
2692                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2693         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2694         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2695         u32 q_off, dma_qm_offset;
2696         u32 dma_qm_err_cfg, irq_handler_offset;
2697
2698         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2699
2700         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2701                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2702         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2703                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2704         so_base_en_lo = lower_32_bits(CFG_BASE +
2705                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2706         so_base_en_hi = upper_32_bits(CFG_BASE +
2707                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2708         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2709                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2710         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2711                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2712         so_base_ws_lo = lower_32_bits(CFG_BASE +
2713                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2714         so_base_ws_hi = upper_32_bits(CFG_BASE +
2715                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2716
2717         q_off = dma_qm_offset + qman_id * 4;
2718
2719         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2720         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2721
2722         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2723         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2724         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2725
2726         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2727         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2728                                                         QMAN_LDMA_SRC_OFFSET);
2729         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2730                                                         QMAN_LDMA_DST_OFFSET);
2731
2732         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2733         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2734         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2735         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2736         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2737         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2738         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2739         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2740
2741         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2742
2743         /* The following configuration is needed only once per QMAN */
2744         if (qman_id == 0) {
2745                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2746                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2747                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2748
2749                 /* Configure RAZWI IRQ */
2750                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2751                 if (hdev->stop_on_err)
2752                         dma_qm_err_cfg |=
2753                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2754
2755                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2756
2757                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2758                         lower_32_bits(CFG_BASE + irq_handler_offset));
2759                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2760                         upper_32_bits(CFG_BASE + irq_handler_offset));
2761
2762                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2763                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2764                                                                         dma_id);
2765
2766                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2767                                 QM_ARB_ERR_MSG_EN_MASK);
2768
2769                 /* Increase ARB WDT to support streams architecture */
2770                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2771                                 GAUDI_ARB_WDT_TIMEOUT);
2772
2773                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2774                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2775
2776                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2777         }
2778 }
2779
2780 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2781 {
2782         struct cpu_dyn_regs *dyn_regs =
2783                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2784         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2785         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2786         u32 irq_handler_offset;
2787
2788         /* Set to maximum possible according to physical size */
2789         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2790         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2791
2792         /* WA for H/W bug H3-2116 */
2793         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2794
2795         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2796         if (hdev->stop_on_err)
2797                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2798
2799         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2800
2801         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2802                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2803                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2804
2805         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2806                 lower_32_bits(CFG_BASE + irq_handler_offset));
2807         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2808                 upper_32_bits(CFG_BASE + irq_handler_offset));
2809
2810         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2811                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2812         WREG32(mmDMA0_CORE_PROT + dma_offset,
2813                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2814         /* If the channel is secured, it should be in MMU bypass mode */
2815         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2816                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2817         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2818 }
2819
2820 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2821                                 u32 enable_mask)
2822 {
2823         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2824
2825         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2826 }
2827
2828 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2829 {
2830         struct gaudi_device *gaudi = hdev->asic_specific;
2831         struct hl_hw_queue *q;
2832         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2833
2834         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2835                 return;
2836
2837         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2838                 dma_id = gaudi_dma_assignment[i];
2839                 /*
2840                  * For queues after the CPU Q need to add 1 to get the correct
2841                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2842                  * order to get the correct MSI register.
2843                  */
2844                 if (dma_id > 1) {
2845                         cpu_skip = 1;
2846                         nic_skip = NIC_NUMBER_OF_ENGINES;
2847                 } else {
2848                         cpu_skip = 0;
2849                         nic_skip = 0;
2850                 }
2851
2852                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2853                         q_idx = 4 * dma_id + j + cpu_skip;
2854                         q = &hdev->kernel_queues[q_idx];
2855                         q->cq_id = cq_id++;
2856                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2857                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2858                                                 q->bus_address);
2859                 }
2860
2861                 gaudi_init_dma_core(hdev, dma_id);
2862
2863                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2864         }
2865
2866         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2867 }
2868
2869 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2870                                         int qman_id, u64 qman_base_addr)
2871 {
2872         struct cpu_dyn_regs *dyn_regs =
2873                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2874         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2875         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2876         u32 dma_qm_err_cfg, irq_handler_offset;
2877         u32 q_off, dma_qm_offset;
2878
2879         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2880
2881         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2882                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2883         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2884                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2885         so_base_en_lo = lower_32_bits(CFG_BASE +
2886                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2887         so_base_en_hi = upper_32_bits(CFG_BASE +
2888                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2889         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2890                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2891         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2892                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2893         so_base_ws_lo = lower_32_bits(CFG_BASE +
2894                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2895         so_base_ws_hi = upper_32_bits(CFG_BASE +
2896                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2897
2898         q_off = dma_qm_offset + qman_id * 4;
2899
2900         if (qman_id < 4) {
2901                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2902                                         lower_32_bits(qman_base_addr));
2903                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2904                                         upper_32_bits(qman_base_addr));
2905
2906                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2907                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2908                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2909
2910                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2911                                                         QMAN_CPDMA_SIZE_OFFSET);
2912                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2913                                                         QMAN_CPDMA_SRC_OFFSET);
2914                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2915                                                         QMAN_CPDMA_DST_OFFSET);
2916         } else {
2917                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2918                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2919                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2920
2921                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2922                                                         QMAN_LDMA_SIZE_OFFSET);
2923                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2924                                                         QMAN_LDMA_SRC_OFFSET);
2925                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2926                                                         QMAN_LDMA_DST_OFFSET);
2927
2928                 /* Configure RAZWI IRQ */
2929                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2930                 if (hdev->stop_on_err)
2931                         dma_qm_err_cfg |=
2932                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2933
2934                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2935
2936                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2937                         lower_32_bits(CFG_BASE + irq_handler_offset));
2938                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2939                         upper_32_bits(CFG_BASE + irq_handler_offset));
2940
2941                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2942                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2943                                                                         dma_id);
2944
2945                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2946                                 QM_ARB_ERR_MSG_EN_MASK);
2947
2948                 /* Increase ARB WDT to support streams architecture */
2949                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2950                                 GAUDI_ARB_WDT_TIMEOUT);
2951
2952                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2953                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2954                                 QMAN_INTERNAL_MAKE_TRUSTED);
2955         }
2956
2957         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2958         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2959         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2960         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2961
2962         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2963         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2964                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2965                                 mtr_base_ws_lo);
2966                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2967                                 mtr_base_ws_hi);
2968                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2969                                 so_base_ws_lo);
2970                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2971                                 so_base_ws_hi);
2972         }
2973 }
2974
2975 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2976 {
2977         struct gaudi_device *gaudi = hdev->asic_specific;
2978         struct gaudi_internal_qman_info *q;
2979         u64 qman_base_addr;
2980         int i, j, dma_id, internal_q_index;
2981
2982         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2983                 return;
2984
2985         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2986                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2987
2988                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2989                          /*
2990                           * Add the CPU queue in order to get the correct queue
2991                           * number as all internal queue are placed after it
2992                           */
2993                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2994
2995                         q = &gaudi->internal_qmans[internal_q_index];
2996                         qman_base_addr = (u64) q->pq_dma_addr;
2997                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2998                                                 qman_base_addr);
2999                 }
3000
3001                 /* Initializing lower CP for HBM DMA QMAN */
3002                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
3003
3004                 gaudi_init_dma_core(hdev, dma_id);
3005
3006                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
3007         }
3008
3009         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
3010 }
3011
3012 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
3013                                         int qman_id, u64 qman_base_addr)
3014 {
3015         struct cpu_dyn_regs *dyn_regs =
3016                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3017         u32 mtr_base_lo, mtr_base_hi;
3018         u32 so_base_lo, so_base_hi;
3019         u32 irq_handler_offset;
3020         u32 q_off, mme_id;
3021         u32 mme_qm_err_cfg;
3022
3023         mtr_base_lo = lower_32_bits(CFG_BASE +
3024                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3025         mtr_base_hi = upper_32_bits(CFG_BASE +
3026                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3027         so_base_lo = lower_32_bits(CFG_BASE +
3028                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3029         so_base_hi = upper_32_bits(CFG_BASE +
3030                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3031
3032         q_off = mme_offset + qman_id * 4;
3033
3034         if (qman_id < 4) {
3035                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
3036                                         lower_32_bits(qman_base_addr));
3037                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
3038                                         upper_32_bits(qman_base_addr));
3039
3040                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
3041                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
3042                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
3043
3044                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3045                                                         QMAN_CPDMA_SIZE_OFFSET);
3046                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3047                                                         QMAN_CPDMA_SRC_OFFSET);
3048                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3049                                                         QMAN_CPDMA_DST_OFFSET);
3050         } else {
3051                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3052                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3053                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
3054
3055                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3056                                                         QMAN_LDMA_SIZE_OFFSET);
3057                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3058                                                         QMAN_LDMA_SRC_OFFSET);
3059                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3060                                                         QMAN_LDMA_DST_OFFSET);
3061
3062                 /* Configure RAZWI IRQ */
3063                 mme_id = mme_offset /
3064                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
3065
3066                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3067                 if (hdev->stop_on_err)
3068                         mme_qm_err_cfg |=
3069                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3070
3071                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
3072
3073                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
3074                         lower_32_bits(CFG_BASE + irq_handler_offset));
3075                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
3076                         upper_32_bits(CFG_BASE + irq_handler_offset));
3077
3078                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
3079                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
3080                                                                         mme_id);
3081
3082                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
3083                                 QM_ARB_ERR_MSG_EN_MASK);
3084
3085                 /* Increase ARB WDT to support streams architecture */
3086                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
3087                                 GAUDI_ARB_WDT_TIMEOUT);
3088
3089                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
3090                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
3091                                 QMAN_INTERNAL_MAKE_TRUSTED);
3092         }
3093
3094         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
3095         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
3096         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
3097         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
3098 }
3099
3100 static void gaudi_init_mme_qmans(struct hl_device *hdev)
3101 {
3102         struct gaudi_device *gaudi = hdev->asic_specific;
3103         struct gaudi_internal_qman_info *q;
3104         u64 qman_base_addr;
3105         u32 mme_offset;
3106         int i, internal_q_index;
3107
3108         if (gaudi->hw_cap_initialized & HW_CAP_MME)
3109                 return;
3110
3111         /*
3112          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
3113          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
3114          */
3115
3116         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3117
3118         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
3119                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
3120                 q = &gaudi->internal_qmans[internal_q_index];
3121                 qman_base_addr = (u64) q->pq_dma_addr;
3122                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
3123                                         qman_base_addr);
3124                 if (i == 3)
3125                         mme_offset = 0;
3126         }
3127
3128         /* Initializing lower CP for MME QMANs */
3129         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
3130         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
3131         gaudi_init_mme_qman(hdev, 0, 4, 0);
3132
3133         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3134         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
3135
3136         gaudi->hw_cap_initialized |= HW_CAP_MME;
3137 }
3138
3139 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
3140                                 int qman_id, u64 qman_base_addr)
3141 {
3142         struct cpu_dyn_regs *dyn_regs =
3143                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3144         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3145         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3146         u32 tpc_qm_err_cfg, irq_handler_offset;
3147         u32 q_off, tpc_id;
3148
3149         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3150                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3151         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3152                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3153         so_base_en_lo = lower_32_bits(CFG_BASE +
3154                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3155         so_base_en_hi = upper_32_bits(CFG_BASE +
3156                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3157         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3158                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3160                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3161         so_base_ws_lo = lower_32_bits(CFG_BASE +
3162                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163         so_base_ws_hi = upper_32_bits(CFG_BASE +
3164                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3165
3166         q_off = tpc_offset + qman_id * 4;
3167
3168         tpc_id = tpc_offset /
3169                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3170
3171         if (qman_id < 4) {
3172                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3173                                         lower_32_bits(qman_base_addr));
3174                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3175                                         upper_32_bits(qman_base_addr));
3176
3177                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3178                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3179                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3180
3181                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3182                                                         QMAN_CPDMA_SIZE_OFFSET);
3183                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3184                                                         QMAN_CPDMA_SRC_OFFSET);
3185                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3186                                                         QMAN_CPDMA_DST_OFFSET);
3187         } else {
3188                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3189                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3190                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3191
3192                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3193                                                         QMAN_LDMA_SIZE_OFFSET);
3194                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3195                                                         QMAN_LDMA_SRC_OFFSET);
3196                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3197                                                         QMAN_LDMA_DST_OFFSET);
3198
3199                 /* Configure RAZWI IRQ */
3200                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3201                 if (hdev->stop_on_err)
3202                         tpc_qm_err_cfg |=
3203                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3204
3205                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3206
3207                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3208                         lower_32_bits(CFG_BASE + irq_handler_offset));
3209                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3210                         upper_32_bits(CFG_BASE + irq_handler_offset));
3211
3212                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3213                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3214                                                                         tpc_id);
3215
3216                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3217                                 QM_ARB_ERR_MSG_EN_MASK);
3218
3219                 /* Increase ARB WDT to support streams architecture */
3220                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3221                                 GAUDI_ARB_WDT_TIMEOUT);
3222
3223                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3224                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3225                                 QMAN_INTERNAL_MAKE_TRUSTED);
3226         }
3227
3228         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3229         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3230         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3231         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3232
3233         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3234         if (tpc_id == 6) {
3235                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3236                                 mtr_base_ws_lo);
3237                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3238                                 mtr_base_ws_hi);
3239                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3240                                 so_base_ws_lo);
3241                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3242                                 so_base_ws_hi);
3243         }
3244 }
3245
3246 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3247 {
3248         struct gaudi_device *gaudi = hdev->asic_specific;
3249         struct gaudi_internal_qman_info *q;
3250         u64 qman_base_addr;
3251         u32 so_base_hi, tpc_offset = 0;
3252         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3253                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3254         int i, tpc_id, internal_q_index;
3255
3256         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3257                 return;
3258
3259         so_base_hi = upper_32_bits(CFG_BASE +
3260                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3261
3262         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3263                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3264                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3265                                                 tpc_id * QMAN_STREAMS + i;
3266                         q = &gaudi->internal_qmans[internal_q_index];
3267                         qman_base_addr = (u64) q->pq_dma_addr;
3268                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3269                                                 qman_base_addr);
3270
3271                         if (i == 3) {
3272                                 /* Initializing lower CP for TPC QMAN */
3273                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3274
3275                                 /* Enable the QMAN and TPC channel */
3276                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3277                                                 QMAN_TPC_ENABLE);
3278                         }
3279                 }
3280
3281                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3282                                 so_base_hi);
3283
3284                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3285
3286                 gaudi->hw_cap_initialized |=
3287                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3288         }
3289 }
3290
3291 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3292                                 int qman_id, u64 qman_base_addr, int nic_id)
3293 {
3294         struct cpu_dyn_regs *dyn_regs =
3295                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3296         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3297         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3298         u32 nic_qm_err_cfg, irq_handler_offset;
3299         u32 q_off;
3300
3301         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3302                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3303         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3304                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3305         so_base_en_lo = lower_32_bits(CFG_BASE +
3306                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3307         so_base_en_hi = upper_32_bits(CFG_BASE +
3308                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3309         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3310                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3311         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3312                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3313         so_base_ws_lo = lower_32_bits(CFG_BASE +
3314                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3315         so_base_ws_hi = upper_32_bits(CFG_BASE +
3316                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3317
3318         q_off = nic_offset + qman_id * 4;
3319
3320         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3321         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3322
3323         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3324         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3325         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3326
3327         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3328                                                         QMAN_LDMA_SIZE_OFFSET);
3329         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3330                                                         QMAN_LDMA_SRC_OFFSET);
3331         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3332                                                         QMAN_LDMA_DST_OFFSET);
3333
3334         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3335         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3336         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3337         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3338
3339         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3340         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3341         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3342         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3343         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3344
3345         if (qman_id == 0) {
3346                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3347                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3348                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3349
3350                 /* Configure RAZWI IRQ */
3351                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3352                 if (hdev->stop_on_err)
3353                         nic_qm_err_cfg |=
3354                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3355
3356                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3357
3358                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3359                         lower_32_bits(CFG_BASE + irq_handler_offset));
3360                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3361                         upper_32_bits(CFG_BASE + irq_handler_offset));
3362
3363                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3364                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3365                                                                         nic_id);
3366
3367                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3368                                 QM_ARB_ERR_MSG_EN_MASK);
3369
3370                 /* Increase ARB WDT to support streams architecture */
3371                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3372                                 GAUDI_ARB_WDT_TIMEOUT);
3373
3374                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3375                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3376                                 QMAN_INTERNAL_MAKE_TRUSTED);
3377         }
3378 }
3379
3380 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3381 {
3382         struct gaudi_device *gaudi = hdev->asic_specific;
3383         struct gaudi_internal_qman_info *q;
3384         u64 qman_base_addr;
3385         u32 nic_offset = 0;
3386         u32 nic_delta_between_qmans =
3387                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3388         u32 nic_delta_between_nics =
3389                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3390         int i, nic_id, internal_q_index;
3391
3392         if (!hdev->nic_ports_mask)
3393                 return;
3394
3395         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3396                 return;
3397
3398         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3399
3400         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3401                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3402                         nic_offset += nic_delta_between_qmans;
3403                         if (nic_id & 1) {
3404                                 nic_offset -= (nic_delta_between_qmans * 2);
3405                                 nic_offset += nic_delta_between_nics;
3406                         }
3407                         continue;
3408                 }
3409
3410                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3411                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3412                                                 nic_id * QMAN_STREAMS + i;
3413                         q = &gaudi->internal_qmans[internal_q_index];
3414                         qman_base_addr = (u64) q->pq_dma_addr;
3415                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3416                                                 qman_base_addr, nic_id);
3417                 }
3418
3419                 /* Enable the QMAN */
3420                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3421
3422                 nic_offset += nic_delta_between_qmans;
3423                 if (nic_id & 1) {
3424                         nic_offset -= (nic_delta_between_qmans * 2);
3425                         nic_offset += nic_delta_between_nics;
3426                 }
3427
3428                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3429         }
3430 }
3431
3432 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3433 {
3434         struct gaudi_device *gaudi = hdev->asic_specific;
3435
3436         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3437                 return;
3438
3439         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3440         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3441         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3442 }
3443
3444 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3445 {
3446         struct gaudi_device *gaudi = hdev->asic_specific;
3447
3448         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3449                 return;
3450
3451         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3452         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3453         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3454         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3455         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3456 }
3457
3458 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3459 {
3460         struct gaudi_device *gaudi = hdev->asic_specific;
3461
3462         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3463                 return;
3464
3465         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3466         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3467 }
3468
3469 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3470 {
3471         struct gaudi_device *gaudi = hdev->asic_specific;
3472         u32 tpc_offset = 0;
3473         int tpc_id;
3474
3475         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3476                 return;
3477
3478         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3479                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3480                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3481         }
3482 }
3483
3484 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3485 {
3486         struct gaudi_device *gaudi = hdev->asic_specific;
3487         u32 nic_mask, nic_offset = 0;
3488         u32 nic_delta_between_qmans =
3489                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3490         u32 nic_delta_between_nics =
3491                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3492         int nic_id;
3493
3494         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3495                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3496
3497                 if (gaudi->hw_cap_initialized & nic_mask)
3498                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3499
3500                 nic_offset += nic_delta_between_qmans;
3501                 if (nic_id & 1) {
3502                         nic_offset -= (nic_delta_between_qmans * 2);
3503                         nic_offset += nic_delta_between_nics;
3504                 }
3505         }
3506 }
3507
3508 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3509 {
3510         struct gaudi_device *gaudi = hdev->asic_specific;
3511
3512         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3513                 return;
3514
3515         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3516         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3517         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3518         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3519 }
3520
3521 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3522 {
3523         struct gaudi_device *gaudi = hdev->asic_specific;
3524
3525         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3526                 return;
3527
3528         /* Stop CPs of HBM DMA QMANs */
3529
3530         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3531         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3532         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3533         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3534         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3535 }
3536
3537 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3538 {
3539         struct gaudi_device *gaudi = hdev->asic_specific;
3540
3541         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3542                 return;
3543
3544         /* Stop CPs of MME QMANs */
3545         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3546         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3547 }
3548
3549 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3550 {
3551         struct gaudi_device *gaudi = hdev->asic_specific;
3552
3553         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3554                 return;
3555
3556         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3557         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3558         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3559         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3560         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3561         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3562         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3563         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3564 }
3565
3566 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3567 {
3568         struct gaudi_device *gaudi = hdev->asic_specific;
3569
3570         /* Stop upper CPs of QMANs */
3571
3572         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3573                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3574                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3575                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3576                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3577
3578         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3579                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3580                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3581                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3582                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3583
3584         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3585                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3586                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3587                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3588                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3589
3590         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3591                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3592                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3593                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3594                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3595
3596         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3597                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3598                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3599                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3600                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3601
3602         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3603                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3604                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3605                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3606                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3607
3608         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3609                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3610                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3611                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3612                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3613
3614         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3615                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3616                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3617                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3618                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3619
3620         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3621                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3622                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3623                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3624                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3625
3626         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3627                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3628                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3629                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3630                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3631 }
3632
3633 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3634 {
3635         struct gaudi_device *gaudi = hdev->asic_specific;
3636
3637         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3638                 return;
3639
3640         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3641         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3642         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3643 }
3644
3645 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3646 {
3647         struct gaudi_device *gaudi = hdev->asic_specific;
3648
3649         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3650                 return;
3651
3652         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3653         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3654         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3655         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3656         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3657 }
3658
3659 static void gaudi_mme_stall(struct hl_device *hdev)
3660 {
3661         struct gaudi_device *gaudi = hdev->asic_specific;
3662
3663         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3664                 return;
3665
3666         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3667         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3668         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3669         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3670         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3671         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3672         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3673         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3674         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3675         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3676         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3677         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3678         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3679         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3680         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3681         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3682         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3683 }
3684
3685 static void gaudi_tpc_stall(struct hl_device *hdev)
3686 {
3687         struct gaudi_device *gaudi = hdev->asic_specific;
3688
3689         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3690                 return;
3691
3692         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3693         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3694         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3695         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3696         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3697         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3698         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3699         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3700 }
3701
3702 static void gaudi_set_clock_gating(struct hl_device *hdev)
3703 {
3704         struct gaudi_device *gaudi = hdev->asic_specific;
3705         u32 qman_offset;
3706         bool enable;
3707         int i;
3708
3709         /* In case we are during debug session, don't enable the clock gate
3710          * as it may interfere
3711          */
3712         if (hdev->in_debug)
3713                 return;
3714
3715         if (hdev->asic_prop.fw_security_enabled)
3716                 return;
3717
3718         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3719                 enable = !!(hdev->clock_gating_mask &
3720                                 (BIT_ULL(gaudi_dma_assignment[i])));
3721
3722                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3723                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3724                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3725                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3726                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3727         }
3728
3729         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3730                 enable = !!(hdev->clock_gating_mask &
3731                                 (BIT_ULL(gaudi_dma_assignment[i])));
3732
3733                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3734                  * we need to not enable clock gating in that DMA
3735                  */
3736                 if (i == GAUDI_HBM_DMA_4)
3737                         enable = 0;
3738
3739                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3740                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3741                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3742                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3743                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3744         }
3745
3746         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3747         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3748         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3749
3750         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3751         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3752         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3753
3754         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3755                 enable = !!(hdev->clock_gating_mask &
3756                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3757
3758                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3759                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3760                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3761                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3762
3763                 qman_offset += TPC_QMAN_OFFSET;
3764         }
3765
3766         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3767 }
3768
3769 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3770 {
3771         struct gaudi_device *gaudi = hdev->asic_specific;
3772         u32 qman_offset;
3773         int i;
3774
3775         if (hdev->asic_prop.fw_security_enabled)
3776                 return;
3777
3778         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3779                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3780                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3781
3782                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3783         }
3784
3785         WREG32(mmMME0_QM_CGM_CFG, 0);
3786         WREG32(mmMME0_QM_CGM_CFG1, 0);
3787         WREG32(mmMME2_QM_CGM_CFG, 0);
3788         WREG32(mmMME2_QM_CGM_CFG1, 0);
3789
3790         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3791                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3792                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3793
3794                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3795         }
3796
3797         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3798 }
3799
3800 static void gaudi_enable_timestamp(struct hl_device *hdev)
3801 {
3802         /* Disable the timestamp counter */
3803         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3804
3805         /* Zero the lower/upper parts of the 64-bit counter */
3806         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3807         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3808
3809         /* Enable the counter */
3810         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3811 }
3812
3813 static void gaudi_disable_timestamp(struct hl_device *hdev)
3814 {
3815         /* Disable the timestamp counter */
3816         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3817 }
3818
3819 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3820 {
3821         u32 wait_timeout_ms;
3822
3823         dev_info(hdev->dev,
3824                 "Halting compute engines and disabling interrupts\n");
3825
3826         if (hdev->pldm)
3827                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3828         else
3829                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3830
3831         gaudi_stop_nic_qmans(hdev);
3832         gaudi_stop_mme_qmans(hdev);
3833         gaudi_stop_tpc_qmans(hdev);
3834         gaudi_stop_hbm_dma_qmans(hdev);
3835         gaudi_stop_pci_dma_qmans(hdev);
3836
3837         hdev->asic_funcs->disable_clock_gating(hdev);
3838
3839         msleep(wait_timeout_ms);
3840
3841         gaudi_pci_dma_stall(hdev);
3842         gaudi_hbm_dma_stall(hdev);
3843         gaudi_tpc_stall(hdev);
3844         gaudi_mme_stall(hdev);
3845
3846         msleep(wait_timeout_ms);
3847
3848         gaudi_disable_nic_qmans(hdev);
3849         gaudi_disable_mme_qmans(hdev);
3850         gaudi_disable_tpc_qmans(hdev);
3851         gaudi_disable_hbm_dma_qmans(hdev);
3852         gaudi_disable_pci_dma_qmans(hdev);
3853
3854         gaudi_disable_timestamp(hdev);
3855
3856         gaudi_disable_msi(hdev);
3857 }
3858
3859 static int gaudi_mmu_init(struct hl_device *hdev)
3860 {
3861         struct asic_fixed_properties *prop = &hdev->asic_prop;
3862         struct gaudi_device *gaudi = hdev->asic_specific;
3863         u64 hop0_addr;
3864         int rc, i;
3865
3866         if (!hdev->mmu_enable)
3867                 return 0;
3868
3869         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3870                 return 0;
3871
3872         for (i = 0 ; i < prop->max_asid ; i++) {
3873                 hop0_addr = prop->mmu_pgt_addr +
3874                                 (i * prop->mmu_hop_table_size);
3875
3876                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3877                 if (rc) {
3878                         dev_err(hdev->dev,
3879                                 "failed to set hop0 addr for asid %d\n", i);
3880                         goto err;
3881                 }
3882         }
3883
3884         /* init MMU cache manage page */
3885         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3886         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3887
3888         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3889
3890         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3891         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3892
3893         WREG32(mmSTLB_HOP_CONFIGURATION,
3894                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3895
3896         /*
3897          * The H/W expects the first PI after init to be 1. After wraparound
3898          * we'll write 0.
3899          */
3900         gaudi->mmu_cache_inv_pi = 1;
3901
3902         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3903
3904         return 0;
3905
3906 err:
3907         return rc;
3908 }
3909
3910 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3911 {
3912         void __iomem *dst;
3913
3914         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3915
3916         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3917 }
3918
3919 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3920 {
3921         void __iomem *dst;
3922
3923         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3924
3925         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3926 }
3927
3928 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3929 {
3930         struct dynamic_fw_load_mgr *dynamic_loader;
3931         struct cpu_dyn_regs *dyn_regs;
3932
3933         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3934
3935         /*
3936          * here we update initial values for few specific dynamic regs (as
3937          * before reading the first descriptor from FW those value has to be
3938          * hard-coded) in later stages of the protocol those values will be
3939          * updated automatically by reading the FW descriptor so data there
3940          * will always be up-to-date
3941          */
3942         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3943         dyn_regs->kmd_msg_to_cpu =
3944                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3945         dyn_regs->cpu_cmd_status_to_host =
3946                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3947
3948         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3949 }
3950
3951 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3952 {
3953         struct static_fw_load_mgr *static_loader;
3954
3955         static_loader = &hdev->fw_loader.static_loader;
3956
3957         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3958         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3959         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3960         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3961         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3962         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3963         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3964         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3965         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3966         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3967         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3968         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3969         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3970                         GAUDI_PLDM_RESET_WAIT_MSEC :
3971                         GAUDI_CPU_RESET_WAIT_MSEC;
3972 }
3973
3974 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3975 {
3976         struct asic_fixed_properties *prop = &hdev->asic_prop;
3977         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3978
3979         /* fill common fields */
3980         fw_loader->linux_loaded = false;
3981         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3982         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3983         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3984         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3985         fw_loader->skip_bmc = !hdev->bmc_enable;
3986         fw_loader->sram_bar_id = SRAM_BAR_ID;
3987         fw_loader->dram_bar_id = HBM_BAR_ID;
3988
3989         if (prop->dynamic_fw_load)
3990                 gaudi_init_dynamic_firmware_loader(hdev);
3991         else
3992                 gaudi_init_static_firmware_loader(hdev);
3993 }
3994
3995 static int gaudi_init_cpu(struct hl_device *hdev)
3996 {
3997         struct gaudi_device *gaudi = hdev->asic_specific;
3998         int rc;
3999
4000         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4001                 return 0;
4002
4003         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
4004                 return 0;
4005
4006         /*
4007          * The device CPU works with 40 bits addresses.
4008          * This register sets the extension to 50 bits.
4009          */
4010         if (!hdev->asic_prop.fw_security_enabled)
4011                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
4012
4013         rc = hl_fw_init_cpu(hdev);
4014
4015         if (rc)
4016                 return rc;
4017
4018         gaudi->hw_cap_initialized |= HW_CAP_CPU;
4019
4020         return 0;
4021 }
4022
4023 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4024 {
4025         struct cpu_dyn_regs *dyn_regs =
4026                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4027         struct asic_fixed_properties *prop = &hdev->asic_prop;
4028         struct gaudi_device *gaudi = hdev->asic_specific;
4029         u32 status, irq_handler_offset;
4030         struct hl_eq *eq;
4031         struct hl_hw_queue *cpu_pq =
4032                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
4033         int err;
4034
4035         if (!hdev->cpu_queues_enable)
4036                 return 0;
4037
4038         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4039                 return 0;
4040
4041         eq = &hdev->event_queue;
4042
4043         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4044         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4045
4046         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4047         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4048
4049         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
4050                         lower_32_bits(hdev->cpu_accessible_dma_address));
4051         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
4052                         upper_32_bits(hdev->cpu_accessible_dma_address));
4053
4054         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4055         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4056         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4057
4058         /* Used for EQ CI */
4059         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4060
4061         WREG32(mmCPU_IF_PF_PQ_PI, 0);
4062
4063         if (gaudi->multi_msi_mode)
4064                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4065         else
4066                 WREG32(mmCPU_IF_QUEUE_INIT,
4067                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
4068
4069         irq_handler_offset = prop->gic_interrupts_enable ?
4070                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4071                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4072
4073         WREG32(irq_handler_offset,
4074                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4075
4076         err = hl_poll_timeout(
4077                 hdev,
4078                 mmCPU_IF_QUEUE_INIT,
4079                 status,
4080                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4081                 1000,
4082                 cpu_timeout);
4083
4084         if (err) {
4085                 dev_err(hdev->dev,
4086                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
4087                 return -EIO;
4088         }
4089
4090         /* update FW application security bits */
4091         if (prop->fw_cpu_boot_dev_sts0_valid)
4092                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
4093         if (prop->fw_cpu_boot_dev_sts1_valid)
4094                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
4095
4096         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
4097         return 0;
4098 }
4099
4100 static void gaudi_pre_hw_init(struct hl_device *hdev)
4101 {
4102         /* Perform read from the device to make sure device is up */
4103         RREG32(mmHW_STATE);
4104
4105         if (!hdev->asic_prop.fw_security_enabled) {
4106                 /* Set the access through PCI bars (Linux driver only) as
4107                  * secured
4108                  */
4109                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
4110                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
4111                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
4112
4113                 /* Perform read to flush the waiting writes to ensure
4114                  * configuration was set in the device
4115                  */
4116                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
4117         }
4118
4119         /*
4120          * Let's mark in the H/W that we have reached this point. We check
4121          * this value in the reset_before_init function to understand whether
4122          * we need to reset the chip before doing H/W init. This register is
4123          * cleared by the H/W upon H/W reset
4124          */
4125         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
4126 }
4127
4128 static int gaudi_hw_init(struct hl_device *hdev)
4129 {
4130         struct gaudi_device *gaudi = hdev->asic_specific;
4131         int rc;
4132
4133         gaudi_pre_hw_init(hdev);
4134
4135         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
4136          * So we set it here and if anyone tries to move it later to
4137          * a different address, there will be an error
4138          */
4139         if (hdev->asic_prop.iatu_done_by_fw)
4140                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
4141
4142         /*
4143          * Before pushing u-boot/linux to device, need to set the hbm bar to
4144          * base address of dram
4145          */
4146         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4147                 dev_err(hdev->dev,
4148                         "failed to map HBM bar to DRAM base address\n");
4149                 return -EIO;
4150         }
4151
4152         rc = gaudi_init_cpu(hdev);
4153         if (rc) {
4154                 dev_err(hdev->dev, "failed to initialize CPU\n");
4155                 return rc;
4156         }
4157
4158         /* In case the clock gating was enabled in preboot we need to disable
4159          * it here before touching the MME/TPC registers.
4160          * There is no need to take clk gating mutex because when this function
4161          * runs, no other relevant code can run
4162          */
4163         hdev->asic_funcs->disable_clock_gating(hdev);
4164
4165         /* SRAM scrambler must be initialized after CPU is running from HBM */
4166         gaudi_init_scrambler_sram(hdev);
4167
4168         /* This is here just in case we are working without CPU */
4169         gaudi_init_scrambler_hbm(hdev);
4170
4171         gaudi_init_golden_registers(hdev);
4172
4173         rc = gaudi_mmu_init(hdev);
4174         if (rc)
4175                 return rc;
4176
4177         gaudi_init_security(hdev);
4178
4179         gaudi_init_pci_dma_qmans(hdev);
4180
4181         gaudi_init_hbm_dma_qmans(hdev);
4182
4183         gaudi_init_mme_qmans(hdev);
4184
4185         gaudi_init_tpc_qmans(hdev);
4186
4187         gaudi_init_nic_qmans(hdev);
4188
4189         hdev->asic_funcs->set_clock_gating(hdev);
4190
4191         gaudi_enable_timestamp(hdev);
4192
4193         /* MSI must be enabled before CPU queues and NIC are initialized */
4194         rc = gaudi_enable_msi(hdev);
4195         if (rc)
4196                 goto disable_queues;
4197
4198         /* must be called after MSI was enabled */
4199         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4200         if (rc) {
4201                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4202                         rc);
4203                 goto disable_msi;
4204         }
4205
4206         /* Perform read from the device to flush all configuration */
4207         RREG32(mmHW_STATE);
4208
4209         return 0;
4210
4211 disable_msi:
4212         gaudi_disable_msi(hdev);
4213 disable_queues:
4214         gaudi_disable_mme_qmans(hdev);
4215         gaudi_disable_pci_dma_qmans(hdev);
4216
4217         return rc;
4218 }
4219
4220 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4221 {
4222         struct cpu_dyn_regs *dyn_regs =
4223                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4224         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4225         struct gaudi_device *gaudi = hdev->asic_specific;
4226         bool driver_performs_reset;
4227
4228         if (!hard_reset) {
4229                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4230                 return;
4231         }
4232
4233         if (hdev->pldm) {
4234                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4235                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4236         } else {
4237                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4238                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4239         }
4240
4241         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4242                                         !hdev->asic_prop.hard_reset_done_by_fw);
4243
4244         /* Set device to handle FLR by H/W as we will put the device CPU to
4245          * halt mode
4246          */
4247         if (driver_performs_reset)
4248                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4249                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4250
4251         /* If linux is loaded in the device CPU we need to communicate with it
4252          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4253          * registers in case of old F/Ws
4254          */
4255         if (hdev->fw_loader.linux_loaded) {
4256                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4257                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4258                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4259
4260                 WREG32(irq_handler_offset,
4261                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4262         } else {
4263                 if (hdev->asic_prop.hard_reset_done_by_fw)
4264                         hl_fw_ask_hard_reset_without_linux(hdev);
4265                 else
4266                         hl_fw_ask_halt_machine_without_linux(hdev);
4267         }
4268
4269         if (driver_performs_reset) {
4270
4271                 /* Configure the reset registers. Must be done as early as
4272                  * possible in case we fail during H/W initialization
4273                  */
4274                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4275                                                 (CFG_RST_H_DMA_MASK |
4276                                                 CFG_RST_H_MME_MASK |
4277                                                 CFG_RST_H_SM_MASK |
4278                                                 CFG_RST_H_TPC_7_MASK));
4279
4280                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4281
4282                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4283                                                 (CFG_RST_H_HBM_MASK |
4284                                                 CFG_RST_H_TPC_7_MASK |
4285                                                 CFG_RST_H_NIC_MASK |
4286                                                 CFG_RST_H_SM_MASK |
4287                                                 CFG_RST_H_DMA_MASK |
4288                                                 CFG_RST_H_MME_MASK |
4289                                                 CFG_RST_H_CPU_MASK |
4290                                                 CFG_RST_H_MMU_MASK));
4291
4292                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4293                                                 (CFG_RST_L_IF_MASK |
4294                                                 CFG_RST_L_PSOC_MASK |
4295                                                 CFG_RST_L_TPC_MASK));
4296
4297                 msleep(cpu_timeout_ms);
4298
4299                 /* Tell ASIC not to re-initialize PCIe */
4300                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4301
4302                 /* Restart BTL/BLR upon hard-reset */
4303                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4304
4305                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4306                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4307
4308                 dev_info(hdev->dev,
4309                         "Issued HARD reset command, going to wait %dms\n",
4310                         reset_timeout_ms);
4311         } else {
4312                 dev_info(hdev->dev,
4313                         "Firmware performs HARD reset, going to wait %dms\n",
4314                         reset_timeout_ms);
4315         }
4316
4317         /*
4318          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4319          * itself is in reset. Need to wait until the reset is deasserted
4320          */
4321         msleep(reset_timeout_ms);
4322
4323         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4324         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4325                 dev_err(hdev->dev,
4326                         "Timeout while waiting for device to reset 0x%x\n",
4327                         status);
4328
4329         if (gaudi) {
4330                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4331                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4332                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4333                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4334                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4335                                 HW_CAP_SRAM_SCRAMBLER |
4336                                 HW_CAP_HBM_SCRAMBLER |
4337                                 HW_CAP_CLK_GATE);
4338
4339                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4340
4341                 hdev->device_cpu_is_halted = false;
4342         }
4343 }
4344
4345 static int gaudi_suspend(struct hl_device *hdev)
4346 {
4347         int rc;
4348
4349         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4350         if (rc)
4351                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4352
4353         return rc;
4354 }
4355
4356 static int gaudi_resume(struct hl_device *hdev)
4357 {
4358         return gaudi_init_iatu(hdev);
4359 }
4360
4361 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4362                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4363 {
4364         int rc;
4365
4366         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4367                         VM_DONTCOPY | VM_NORESERVE;
4368
4369         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4370                                 (dma_addr - HOST_PHYS_BASE), size);
4371         if (rc)
4372                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4373
4374         return rc;
4375 }
4376
4377 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4378 {
4379         struct cpu_dyn_regs *dyn_regs =
4380                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4381         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4382         struct gaudi_device *gaudi = hdev->asic_specific;
4383         bool invalid_queue = false;
4384         int dma_id;
4385
4386         switch (hw_queue_id) {
4387         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4388                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4389                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4390                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4391                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4392                 break;
4393
4394         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4395                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4396                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4397                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4398                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4399                 break;
4400
4401         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4402                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4403                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4404                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4405                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4406                 break;
4407
4408         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4409                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4410                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4411                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4412                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4413                 break;
4414
4415         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4416                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4417                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4418                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4419                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4420                 break;
4421
4422         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4423                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4424                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4425                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4426                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4427                 break;
4428
4429         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4430                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4431                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4432                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4433                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4434                 break;
4435
4436         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4437                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4438                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4439                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4440                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4441                 break;
4442
4443         case GAUDI_QUEUE_ID_CPU_PQ:
4444                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4445                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4446                 else
4447                         invalid_queue = true;
4448                 break;
4449
4450         case GAUDI_QUEUE_ID_MME_0_0:
4451                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4452                 break;
4453
4454         case GAUDI_QUEUE_ID_MME_0_1:
4455                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_MME_0_2:
4459                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4460                 break;
4461
4462         case GAUDI_QUEUE_ID_MME_0_3:
4463                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4464                 break;
4465
4466         case GAUDI_QUEUE_ID_MME_1_0:
4467                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4468                 break;
4469
4470         case GAUDI_QUEUE_ID_MME_1_1:
4471                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_MME_1_2:
4475                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4476                 break;
4477
4478         case GAUDI_QUEUE_ID_MME_1_3:
4479                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4480                 break;
4481
4482         case GAUDI_QUEUE_ID_TPC_0_0:
4483                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4484                 break;
4485
4486         case GAUDI_QUEUE_ID_TPC_0_1:
4487                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_TPC_0_2:
4491                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4492                 break;
4493
4494         case GAUDI_QUEUE_ID_TPC_0_3:
4495                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_TPC_1_0:
4499                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4500                 break;
4501
4502         case GAUDI_QUEUE_ID_TPC_1_1:
4503                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_TPC_1_2:
4507                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4508                 break;
4509
4510         case GAUDI_QUEUE_ID_TPC_1_3:
4511                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_TPC_2_0:
4515                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4516                 break;
4517
4518         case GAUDI_QUEUE_ID_TPC_2_1:
4519                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_TPC_2_2:
4523                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4524                 break;
4525
4526         case GAUDI_QUEUE_ID_TPC_2_3:
4527                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_TPC_3_0:
4531                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4532                 break;
4533
4534         case GAUDI_QUEUE_ID_TPC_3_1:
4535                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_TPC_3_2:
4539                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4540                 break;
4541
4542         case GAUDI_QUEUE_ID_TPC_3_3:
4543                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4544                 break;
4545
4546         case GAUDI_QUEUE_ID_TPC_4_0:
4547                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4548                 break;
4549
4550         case GAUDI_QUEUE_ID_TPC_4_1:
4551                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4552                 break;
4553
4554         case GAUDI_QUEUE_ID_TPC_4_2:
4555                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4556                 break;
4557
4558         case GAUDI_QUEUE_ID_TPC_4_3:
4559                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4560                 break;
4561
4562         case GAUDI_QUEUE_ID_TPC_5_0:
4563                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4564                 break;
4565
4566         case GAUDI_QUEUE_ID_TPC_5_1:
4567                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4568                 break;
4569
4570         case GAUDI_QUEUE_ID_TPC_5_2:
4571                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4572                 break;
4573
4574         case GAUDI_QUEUE_ID_TPC_5_3:
4575                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4576                 break;
4577
4578         case GAUDI_QUEUE_ID_TPC_6_0:
4579                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4580                 break;
4581
4582         case GAUDI_QUEUE_ID_TPC_6_1:
4583                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4584                 break;
4585
4586         case GAUDI_QUEUE_ID_TPC_6_2:
4587                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4588                 break;
4589
4590         case GAUDI_QUEUE_ID_TPC_6_3:
4591                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4592                 break;
4593
4594         case GAUDI_QUEUE_ID_TPC_7_0:
4595                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4596                 break;
4597
4598         case GAUDI_QUEUE_ID_TPC_7_1:
4599                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4600                 break;
4601
4602         case GAUDI_QUEUE_ID_TPC_7_2:
4603                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4604                 break;
4605
4606         case GAUDI_QUEUE_ID_TPC_7_3:
4607                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4608                 break;
4609
4610         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4611                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4612                         invalid_queue = true;
4613
4614                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4615                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4616                 break;
4617
4618         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4619                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4620                         invalid_queue = true;
4621
4622                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4623                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4624                 break;
4625
4626         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4627                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4628                         invalid_queue = true;
4629
4630                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4631                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4632                 break;
4633
4634         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4635                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4636                         invalid_queue = true;
4637
4638                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4639                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4640                 break;
4641
4642         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4643                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4644                         invalid_queue = true;
4645
4646                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4647                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4648                 break;
4649
4650         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4651                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4652                         invalid_queue = true;
4653
4654                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4655                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4656                 break;
4657
4658         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4659                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4660                         invalid_queue = true;
4661
4662                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4663                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4664                 break;
4665
4666         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4667                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4668                         invalid_queue = true;
4669
4670                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4671                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4672                 break;
4673
4674         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4675                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4676                         invalid_queue = true;
4677
4678                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4679                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4680                 break;
4681
4682         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4683                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4684                         invalid_queue = true;
4685
4686                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4687                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4688                 break;
4689
4690         default:
4691                 invalid_queue = true;
4692         }
4693
4694         if (invalid_queue) {
4695                 /* Should never get here */
4696                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4697                         hw_queue_id);
4698                 return;
4699         }
4700
4701         db_value = pi;
4702
4703         /* ring the doorbell */
4704         WREG32(db_reg_offset, db_value);
4705
4706         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4707                 /* make sure device CPU will read latest data from host */
4708                 mb();
4709
4710                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4711                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4712                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4713
4714                 WREG32(irq_handler_offset,
4715                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4716         }
4717 }
4718
4719 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4720                                 struct hl_bd *bd)
4721 {
4722         __le64 *pbd = (__le64 *) bd;
4723
4724         /* The QMANs are on the host memory so a simple copy suffice */
4725         pqe[0] = pbd[0];
4726         pqe[1] = pbd[1];
4727 }
4728
4729 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4730                                         dma_addr_t *dma_handle, gfp_t flags)
4731 {
4732         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4733                                                 dma_handle, flags);
4734
4735         /* Shift to the device's base physical address of host memory */
4736         if (kernel_addr)
4737                 *dma_handle += HOST_PHYS_BASE;
4738
4739         return kernel_addr;
4740 }
4741
4742 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4743                 void *cpu_addr, dma_addr_t dma_handle)
4744 {
4745         /* Cancel the device's base physical address of host memory */
4746         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4747
4748         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4749 }
4750
4751 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4752 {
4753         struct asic_fixed_properties *prop = &hdev->asic_prop;
4754         u64  cur_addr = DRAM_BASE_ADDR_USER;
4755         u32 val;
4756         u32 chunk_size;
4757         int rc, dma_id;
4758
4759         while (cur_addr < prop->dram_end_address) {
4760                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4761                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4762
4763                         chunk_size =
4764                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4765
4766                         dev_dbg(hdev->dev,
4767                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4768                                 cur_addr, cur_addr + chunk_size);
4769
4770                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
4771                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
4772                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4773                                                 lower_32_bits(cur_addr));
4774                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4775                                                 upper_32_bits(cur_addr));
4776                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4777                                         chunk_size);
4778                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4779                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4780                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4781
4782                         cur_addr += chunk_size;
4783
4784                         if (cur_addr == prop->dram_end_address)
4785                                 break;
4786                 }
4787
4788                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4789                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4790
4791                         rc = hl_poll_timeout(
4792                                 hdev,
4793                                 mmDMA0_CORE_STS0 + dma_offset,
4794                                 val,
4795                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4796                                 1000,
4797                                 HBM_SCRUBBING_TIMEOUT_US);
4798
4799                         if (rc) {
4800                                 dev_err(hdev->dev,
4801                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4802                                         dma_id);
4803                                 return -EIO;
4804                         }
4805                 }
4806         }
4807
4808         return 0;
4809 }
4810
4811 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4812 {
4813         struct asic_fixed_properties *prop = &hdev->asic_prop;
4814         struct gaudi_device *gaudi = hdev->asic_specific;
4815         int rc = 0;
4816         u64 val = 0;
4817
4818         if (!hdev->memory_scrub)
4819                 return 0;
4820
4821         if (!addr && !size) {
4822                 /* Wait till device is idle */
4823                 rc = hl_poll_timeout(
4824                                 hdev,
4825                                 mmDMA0_CORE_STS0/* dummy */,
4826                                 val/* dummy */,
4827                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4828                                                 0, NULL)),
4829                                                 1000,
4830                                                 HBM_SCRUBBING_TIMEOUT_US);
4831                 if (rc) {
4832                         dev_err(hdev->dev, "waiting for idle timeout\n");
4833                         return -EIO;
4834                 }
4835
4836                 /* Scrub SRAM */
4837                 addr = prop->sram_user_base_address;
4838                 size = hdev->pldm ? 0x10000 :
4839                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4840                 val = 0x7777777777777777ull;
4841
4842                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4843                 if (rc) {
4844                         dev_err(hdev->dev,
4845                                 "Failed to clear SRAM in mem scrub all\n");
4846                         return rc;
4847                 }
4848
4849                 mutex_lock(&gaudi->clk_gate_mutex);
4850                 hdev->asic_funcs->disable_clock_gating(hdev);
4851
4852                 /* Scrub HBM using all DMA channels in parallel */
4853                 rc = gaudi_hbm_scrubbing(hdev);
4854                 if (rc)
4855                         dev_err(hdev->dev,
4856                                 "Failed to clear HBM in mem scrub all\n");
4857
4858                 hdev->asic_funcs->set_clock_gating(hdev);
4859                 mutex_unlock(&gaudi->clk_gate_mutex);
4860         }
4861
4862         return rc;
4863 }
4864
4865 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4866                                 u32 queue_id, dma_addr_t *dma_handle,
4867                                 u16 *queue_len)
4868 {
4869         struct gaudi_device *gaudi = hdev->asic_specific;
4870         struct gaudi_internal_qman_info *q;
4871
4872         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4873                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4874                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4875                 return NULL;
4876         }
4877
4878         q = &gaudi->internal_qmans[queue_id];
4879         *dma_handle = q->pq_dma_addr;
4880         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4881
4882         return q->pq_kernel_addr;
4883 }
4884
4885 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4886                                 u16 len, u32 timeout, u64 *result)
4887 {
4888         struct gaudi_device *gaudi = hdev->asic_specific;
4889
4890         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4891                 if (result)
4892                         *result = 0;
4893                 return 0;
4894         }
4895
4896         if (!timeout)
4897                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4898
4899         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4900                                                 timeout, result);
4901 }
4902
4903 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4904 {
4905         struct packet_msg_prot *fence_pkt;
4906         dma_addr_t pkt_dma_addr;
4907         u32 fence_val, tmp, timeout_usec;
4908         dma_addr_t fence_dma_addr;
4909         u32 *fence_ptr;
4910         int rc;
4911
4912         if (hdev->pldm)
4913                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4914         else
4915                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4916
4917         fence_val = GAUDI_QMAN0_FENCE_VAL;
4918
4919         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4920                                                         &fence_dma_addr);
4921         if (!fence_ptr) {
4922                 dev_err(hdev->dev,
4923                         "Failed to allocate memory for H/W queue %d testing\n",
4924                         hw_queue_id);
4925                 return -ENOMEM;
4926         }
4927
4928         *fence_ptr = 0;
4929
4930         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4931                                         sizeof(struct packet_msg_prot),
4932                                         GFP_KERNEL, &pkt_dma_addr);
4933         if (!fence_pkt) {
4934                 dev_err(hdev->dev,
4935                         "Failed to allocate packet for H/W queue %d testing\n",
4936                         hw_queue_id);
4937                 rc = -ENOMEM;
4938                 goto free_fence_ptr;
4939         }
4940
4941         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4942         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4943         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4944
4945         fence_pkt->ctl = cpu_to_le32(tmp);
4946         fence_pkt->value = cpu_to_le32(fence_val);
4947         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4948
4949         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4950                                         sizeof(struct packet_msg_prot),
4951                                         pkt_dma_addr);
4952         if (rc) {
4953                 dev_err(hdev->dev,
4954                         "Failed to send fence packet to H/W queue %d\n",
4955                         hw_queue_id);
4956                 goto free_pkt;
4957         }
4958
4959         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4960                                         1000, timeout_usec, true);
4961
4962         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4963
4964         if (rc == -ETIMEDOUT) {
4965                 dev_err(hdev->dev,
4966                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4967                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4968                 rc = -EIO;
4969         }
4970
4971 free_pkt:
4972         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4973                                         pkt_dma_addr);
4974 free_fence_ptr:
4975         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4976                                         fence_dma_addr);
4977         return rc;
4978 }
4979
4980 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4981 {
4982         struct gaudi_device *gaudi = hdev->asic_specific;
4983
4984         /*
4985          * check capability here as send_cpu_message() won't update the result
4986          * value if no capability
4987          */
4988         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4989                 return 0;
4990
4991         return hl_fw_test_cpu_queue(hdev);
4992 }
4993
4994 static int gaudi_test_queues(struct hl_device *hdev)
4995 {
4996         int i, rc, ret_val = 0;
4997
4998         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4999                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
5000                         rc = gaudi_test_queue(hdev, i);
5001                         if (rc)
5002                                 ret_val = -EINVAL;
5003                 }
5004         }
5005
5006         rc = gaudi_test_cpu_queue(hdev);
5007         if (rc)
5008                 ret_val = -EINVAL;
5009
5010         return ret_val;
5011 }
5012
5013 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
5014                 gfp_t mem_flags, dma_addr_t *dma_handle)
5015 {
5016         void *kernel_addr;
5017
5018         if (size > GAUDI_DMA_POOL_BLK_SIZE)
5019                 return NULL;
5020
5021         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
5022
5023         /* Shift to the device's base physical address of host memory */
5024         if (kernel_addr)
5025                 *dma_handle += HOST_PHYS_BASE;
5026
5027         return kernel_addr;
5028 }
5029
5030 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
5031                         dma_addr_t dma_addr)
5032 {
5033         /* Cancel the device's base physical address of host memory */
5034         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
5035
5036         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
5037 }
5038
5039 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
5040                                         size_t size, dma_addr_t *dma_handle)
5041 {
5042         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
5043 }
5044
5045 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
5046                                                 size_t size, void *vaddr)
5047 {
5048         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
5049 }
5050
5051 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
5052                         int nents, enum dma_data_direction dir)
5053 {
5054         struct scatterlist *sg;
5055         int i;
5056
5057         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
5058                 return -ENOMEM;
5059
5060         /* Shift to the device's base physical address of host memory */
5061         for_each_sg(sgl, sg, nents, i)
5062                 sg->dma_address += HOST_PHYS_BASE;
5063
5064         return 0;
5065 }
5066
5067 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
5068                         int nents, enum dma_data_direction dir)
5069 {
5070         struct scatterlist *sg;
5071         int i;
5072
5073         /* Cancel the device's base physical address of host memory */
5074         for_each_sg(sgl, sg, nents, i)
5075                 sg->dma_address -= HOST_PHYS_BASE;
5076
5077         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
5078 }
5079
5080 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
5081                                         struct sg_table *sgt)
5082 {
5083         struct scatterlist *sg, *sg_next_iter;
5084         u32 count, dma_desc_cnt;
5085         u64 len, len_next;
5086         dma_addr_t addr, addr_next;
5087
5088         dma_desc_cnt = 0;
5089
5090         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5091
5092                 len = sg_dma_len(sg);
5093                 addr = sg_dma_address(sg);
5094
5095                 if (len == 0)
5096                         break;
5097
5098                 while ((count + 1) < sgt->nents) {
5099                         sg_next_iter = sg_next(sg);
5100                         len_next = sg_dma_len(sg_next_iter);
5101                         addr_next = sg_dma_address(sg_next_iter);
5102
5103                         if (len_next == 0)
5104                                 break;
5105
5106                         if ((addr + len == addr_next) &&
5107                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5108                                 len += len_next;
5109                                 count++;
5110                                 sg = sg_next_iter;
5111                         } else {
5112                                 break;
5113                         }
5114                 }
5115
5116                 dma_desc_cnt++;
5117         }
5118
5119         return dma_desc_cnt * sizeof(struct packet_lin_dma);
5120 }
5121
5122 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
5123                                 struct hl_cs_parser *parser,
5124                                 struct packet_lin_dma *user_dma_pkt,
5125                                 u64 addr, enum dma_data_direction dir)
5126 {
5127         struct hl_userptr *userptr;
5128         int rc;
5129
5130         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5131                         parser->job_userptr_list, &userptr))
5132                 goto already_pinned;
5133
5134         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
5135         if (!userptr)
5136                 return -ENOMEM;
5137
5138         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
5139                                 userptr);
5140         if (rc)
5141                 goto free_userptr;
5142
5143         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5144
5145         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5146                                         userptr->sgt->nents, dir);
5147         if (rc) {
5148                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5149                 goto unpin_memory;
5150         }
5151
5152         userptr->dma_mapped = true;
5153         userptr->dir = dir;
5154
5155 already_pinned:
5156         parser->patched_cb_size +=
5157                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5158
5159         return 0;
5160
5161 unpin_memory:
5162         list_del(&userptr->job_node);
5163         hl_unpin_host_memory(hdev, userptr);
5164 free_userptr:
5165         kfree(userptr);
5166         return rc;
5167 }
5168
5169 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5170                                 struct hl_cs_parser *parser,
5171                                 struct packet_lin_dma *user_dma_pkt,
5172                                 bool src_in_host)
5173 {
5174         enum dma_data_direction dir;
5175         bool skip_host_mem_pin = false, user_memset;
5176         u64 addr;
5177         int rc = 0;
5178
5179         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5180                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5181                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5182
5183         if (src_in_host) {
5184                 if (user_memset)
5185                         skip_host_mem_pin = true;
5186
5187                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5188                 dir = DMA_TO_DEVICE;
5189                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5190         } else {
5191                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5192                 dir = DMA_FROM_DEVICE;
5193                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5194                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5195                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5196         }
5197
5198         if (skip_host_mem_pin)
5199                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5200         else
5201                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5202                                                 addr, dir);
5203
5204         return rc;
5205 }
5206
5207 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5208                                 struct hl_cs_parser *parser,
5209                                 struct packet_lin_dma *user_dma_pkt)
5210 {
5211         bool src_in_host = false;
5212         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5213                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5214                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5215
5216         dev_dbg(hdev->dev, "DMA packet details:\n");
5217         dev_dbg(hdev->dev, "source == 0x%llx\n",
5218                                 le64_to_cpu(user_dma_pkt->src_addr));
5219         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5220         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5221
5222         /*
5223          * Special handling for DMA with size 0. Bypass all validations
5224          * because no transactions will be done except for WR_COMP, which
5225          * is not a security issue
5226          */
5227         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5228                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5229                 return 0;
5230         }
5231
5232         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5233                 src_in_host = true;
5234
5235         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5236                                                 src_in_host);
5237 }
5238
5239 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5240                                         struct hl_cs_parser *parser,
5241                                         struct packet_load_and_exe *user_pkt)
5242 {
5243         u32 cfg;
5244
5245         cfg = le32_to_cpu(user_pkt->cfg);
5246
5247         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5248                 dev_err(hdev->dev,
5249                         "User not allowed to use Load and Execute\n");
5250                 return -EPERM;
5251         }
5252
5253         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5254
5255         return 0;
5256 }
5257
5258 static int gaudi_validate_cb(struct hl_device *hdev,
5259                         struct hl_cs_parser *parser, bool is_mmu)
5260 {
5261         u32 cb_parsed_length = 0;
5262         int rc = 0;
5263
5264         parser->patched_cb_size = 0;
5265
5266         /* cb_user_size is more than 0 so loop will always be executed */
5267         while (cb_parsed_length < parser->user_cb_size) {
5268                 enum packet_id pkt_id;
5269                 u16 pkt_size;
5270                 struct gaudi_packet *user_pkt;
5271
5272                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5273
5274                 pkt_id = (enum packet_id) (
5275                                 (le64_to_cpu(user_pkt->header) &
5276                                 PACKET_HEADER_PACKET_ID_MASK) >>
5277                                         PACKET_HEADER_PACKET_ID_SHIFT);
5278
5279                 if (!validate_packet_id(pkt_id)) {
5280                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5281                         rc = -EINVAL;
5282                         break;
5283                 }
5284
5285                 pkt_size = gaudi_packet_sizes[pkt_id];
5286                 cb_parsed_length += pkt_size;
5287                 if (cb_parsed_length > parser->user_cb_size) {
5288                         dev_err(hdev->dev,
5289                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5290                         rc = -EINVAL;
5291                         break;
5292                 }
5293
5294                 switch (pkt_id) {
5295                 case PACKET_MSG_PROT:
5296                         dev_err(hdev->dev,
5297                                 "User not allowed to use MSG_PROT\n");
5298                         rc = -EPERM;
5299                         break;
5300
5301                 case PACKET_CP_DMA:
5302                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5303                         rc = -EPERM;
5304                         break;
5305
5306                 case PACKET_STOP:
5307                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5308                         rc = -EPERM;
5309                         break;
5310
5311                 case PACKET_WREG_BULK:
5312                         dev_err(hdev->dev,
5313                                 "User not allowed to use WREG_BULK\n");
5314                         rc = -EPERM;
5315                         break;
5316
5317                 case PACKET_LOAD_AND_EXE:
5318                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5319                                 (struct packet_load_and_exe *) user_pkt);
5320                         break;
5321
5322                 case PACKET_LIN_DMA:
5323                         parser->contains_dma_pkt = true;
5324                         if (is_mmu)
5325                                 parser->patched_cb_size += pkt_size;
5326                         else
5327                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5328                                         (struct packet_lin_dma *) user_pkt);
5329                         break;
5330
5331                 case PACKET_WREG_32:
5332                 case PACKET_MSG_LONG:
5333                 case PACKET_MSG_SHORT:
5334                 case PACKET_REPEAT:
5335                 case PACKET_FENCE:
5336                 case PACKET_NOP:
5337                 case PACKET_ARB_POINT:
5338                         parser->patched_cb_size += pkt_size;
5339                         break;
5340
5341                 default:
5342                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5343                                 pkt_id);
5344                         rc = -EINVAL;
5345                         break;
5346                 }
5347
5348                 if (rc)
5349                         break;
5350         }
5351
5352         /*
5353          * The new CB should have space at the end for two MSG_PROT packets:
5354          * 1. A packet that will act as a completion packet
5355          * 2. A packet that will generate MSI-X interrupt
5356          */
5357         if (parser->completion)
5358                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5359
5360         return rc;
5361 }
5362
5363 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5364                                 struct hl_cs_parser *parser,
5365                                 struct packet_lin_dma *user_dma_pkt,
5366                                 struct packet_lin_dma *new_dma_pkt,
5367                                 u32 *new_dma_pkt_size)
5368 {
5369         struct hl_userptr *userptr;
5370         struct scatterlist *sg, *sg_next_iter;
5371         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5372         u64 len, len_next;
5373         dma_addr_t dma_addr, dma_addr_next;
5374         u64 device_memory_addr, addr;
5375         enum dma_data_direction dir;
5376         struct sg_table *sgt;
5377         bool src_in_host = false;
5378         bool skip_host_mem_pin = false;
5379         bool user_memset;
5380
5381         ctl = le32_to_cpu(user_dma_pkt->ctl);
5382
5383         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5384                 src_in_host = true;
5385
5386         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5387                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5388
5389         if (src_in_host) {
5390                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5391                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5392                 dir = DMA_TO_DEVICE;
5393                 if (user_memset)
5394                         skip_host_mem_pin = true;
5395         } else {
5396                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5397                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5398                 dir = DMA_FROM_DEVICE;
5399         }
5400
5401         if ((!skip_host_mem_pin) &&
5402                 (!hl_userptr_is_pinned(hdev, addr,
5403                                         le32_to_cpu(user_dma_pkt->tsize),
5404                                         parser->job_userptr_list, &userptr))) {
5405                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5406                                 addr, user_dma_pkt->tsize);
5407                 return -EFAULT;
5408         }
5409
5410         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5411                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5412                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5413                 return 0;
5414         }
5415
5416         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5417
5418         sgt = userptr->sgt;
5419         dma_desc_cnt = 0;
5420
5421         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5422                 len = sg_dma_len(sg);
5423                 dma_addr = sg_dma_address(sg);
5424
5425                 if (len == 0)
5426                         break;
5427
5428                 while ((count + 1) < sgt->nents) {
5429                         sg_next_iter = sg_next(sg);
5430                         len_next = sg_dma_len(sg_next_iter);
5431                         dma_addr_next = sg_dma_address(sg_next_iter);
5432
5433                         if (len_next == 0)
5434                                 break;
5435
5436                         if ((dma_addr + len == dma_addr_next) &&
5437                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5438                                 len += len_next;
5439                                 count++;
5440                                 sg = sg_next_iter;
5441                         } else {
5442                                 break;
5443                         }
5444                 }
5445
5446                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5447                 if (likely(dma_desc_cnt))
5448                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5449                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5450                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5451                 new_dma_pkt->tsize = cpu_to_le32(len);
5452
5453                 if (dir == DMA_TO_DEVICE) {
5454                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5455                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5456                 } else {
5457                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5458                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5459                 }
5460
5461                 if (!user_memset)
5462                         device_memory_addr += len;
5463                 dma_desc_cnt++;
5464                 new_dma_pkt++;
5465         }
5466
5467         if (!dma_desc_cnt) {
5468                 dev_err(hdev->dev,
5469                         "Error of 0 SG entries when patching DMA packet\n");
5470                 return -EFAULT;
5471         }
5472
5473         /* Fix the last dma packet - wrcomp must be as user set it */
5474         new_dma_pkt--;
5475         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5476
5477         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5478
5479         return 0;
5480 }
5481
5482 static int gaudi_patch_cb(struct hl_device *hdev,
5483                                 struct hl_cs_parser *parser)
5484 {
5485         u32 cb_parsed_length = 0;
5486         u32 cb_patched_cur_length = 0;
5487         int rc = 0;
5488
5489         /* cb_user_size is more than 0 so loop will always be executed */
5490         while (cb_parsed_length < parser->user_cb_size) {
5491                 enum packet_id pkt_id;
5492                 u16 pkt_size;
5493                 u32 new_pkt_size = 0;
5494                 struct gaudi_packet *user_pkt, *kernel_pkt;
5495
5496                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5497                 kernel_pkt = parser->patched_cb->kernel_address +
5498                                         cb_patched_cur_length;
5499
5500                 pkt_id = (enum packet_id) (
5501                                 (le64_to_cpu(user_pkt->header) &
5502                                 PACKET_HEADER_PACKET_ID_MASK) >>
5503                                         PACKET_HEADER_PACKET_ID_SHIFT);
5504
5505                 if (!validate_packet_id(pkt_id)) {
5506                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5507                         rc = -EINVAL;
5508                         break;
5509                 }
5510
5511                 pkt_size = gaudi_packet_sizes[pkt_id];
5512                 cb_parsed_length += pkt_size;
5513                 if (cb_parsed_length > parser->user_cb_size) {
5514                         dev_err(hdev->dev,
5515                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5516                         rc = -EINVAL;
5517                         break;
5518                 }
5519
5520                 switch (pkt_id) {
5521                 case PACKET_LIN_DMA:
5522                         rc = gaudi_patch_dma_packet(hdev, parser,
5523                                         (struct packet_lin_dma *) user_pkt,
5524                                         (struct packet_lin_dma *) kernel_pkt,
5525                                         &new_pkt_size);
5526                         cb_patched_cur_length += new_pkt_size;
5527                         break;
5528
5529                 case PACKET_MSG_PROT:
5530                         dev_err(hdev->dev,
5531                                 "User not allowed to use MSG_PROT\n");
5532                         rc = -EPERM;
5533                         break;
5534
5535                 case PACKET_CP_DMA:
5536                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5537                         rc = -EPERM;
5538                         break;
5539
5540                 case PACKET_STOP:
5541                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5542                         rc = -EPERM;
5543                         break;
5544
5545                 case PACKET_WREG_32:
5546                 case PACKET_WREG_BULK:
5547                 case PACKET_MSG_LONG:
5548                 case PACKET_MSG_SHORT:
5549                 case PACKET_REPEAT:
5550                 case PACKET_FENCE:
5551                 case PACKET_NOP:
5552                 case PACKET_ARB_POINT:
5553                 case PACKET_LOAD_AND_EXE:
5554                         memcpy(kernel_pkt, user_pkt, pkt_size);
5555                         cb_patched_cur_length += pkt_size;
5556                         break;
5557
5558                 default:
5559                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5560                                 pkt_id);
5561                         rc = -EINVAL;
5562                         break;
5563                 }
5564
5565                 if (rc)
5566                         break;
5567         }
5568
5569         return rc;
5570 }
5571
5572 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5573                 struct hl_cs_parser *parser)
5574 {
5575         u64 patched_cb_handle;
5576         u32 patched_cb_size;
5577         struct hl_cb *user_cb;
5578         int rc;
5579
5580         /*
5581          * The new CB should have space at the end for two MSG_PROT pkt:
5582          * 1. A packet that will act as a completion packet
5583          * 2. A packet that will generate MSI interrupt
5584          */
5585         if (parser->completion)
5586                 parser->patched_cb_size = parser->user_cb_size +
5587                                 sizeof(struct packet_msg_prot) * 2;
5588         else
5589                 parser->patched_cb_size = parser->user_cb_size;
5590
5591         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5592                                 parser->patched_cb_size, false, false,
5593                                 &patched_cb_handle);
5594
5595         if (rc) {
5596                 dev_err(hdev->dev,
5597                         "Failed to allocate patched CB for DMA CS %d\n",
5598                         rc);
5599                 return rc;
5600         }
5601
5602         patched_cb_handle >>= PAGE_SHIFT;
5603         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5604                                 (u32) patched_cb_handle);
5605         /* hl_cb_get should never fail */
5606         if (!parser->patched_cb) {
5607                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5608                         (u32) patched_cb_handle);
5609                 rc = -EFAULT;
5610                 goto out;
5611         }
5612
5613         /*
5614          * The check that parser->user_cb_size <= parser->user_cb->size was done
5615          * in validate_queue_index().
5616          */
5617         memcpy(parser->patched_cb->kernel_address,
5618                 parser->user_cb->kernel_address,
5619                 parser->user_cb_size);
5620
5621         patched_cb_size = parser->patched_cb_size;
5622
5623         /* Validate patched CB instead of user CB */
5624         user_cb = parser->user_cb;
5625         parser->user_cb = parser->patched_cb;
5626         rc = gaudi_validate_cb(hdev, parser, true);
5627         parser->user_cb = user_cb;
5628
5629         if (rc) {
5630                 hl_cb_put(parser->patched_cb);
5631                 goto out;
5632         }
5633
5634         if (patched_cb_size != parser->patched_cb_size) {
5635                 dev_err(hdev->dev, "user CB size mismatch\n");
5636                 hl_cb_put(parser->patched_cb);
5637                 rc = -EINVAL;
5638                 goto out;
5639         }
5640
5641 out:
5642         /*
5643          * Always call cb destroy here because we still have 1 reference
5644          * to it by calling cb_get earlier. After the job will be completed,
5645          * cb_put will release it, but here we want to remove it from the
5646          * idr
5647          */
5648         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5649                                         patched_cb_handle << PAGE_SHIFT);
5650
5651         return rc;
5652 }
5653
5654 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5655                 struct hl_cs_parser *parser)
5656 {
5657         u64 patched_cb_handle;
5658         int rc;
5659
5660         rc = gaudi_validate_cb(hdev, parser, false);
5661
5662         if (rc)
5663                 goto free_userptr;
5664
5665         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5666                                 parser->patched_cb_size, false, false,
5667                                 &patched_cb_handle);
5668         if (rc) {
5669                 dev_err(hdev->dev,
5670                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5671                 goto free_userptr;
5672         }
5673
5674         patched_cb_handle >>= PAGE_SHIFT;
5675         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5676                                 (u32) patched_cb_handle);
5677         /* hl_cb_get should never fail here */
5678         if (!parser->patched_cb) {
5679                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5680                                 (u32) patched_cb_handle);
5681                 rc = -EFAULT;
5682                 goto out;
5683         }
5684
5685         rc = gaudi_patch_cb(hdev, parser);
5686
5687         if (rc)
5688                 hl_cb_put(parser->patched_cb);
5689
5690 out:
5691         /*
5692          * Always call cb destroy here because we still have 1 reference
5693          * to it by calling cb_get earlier. After the job will be completed,
5694          * cb_put will release it, but here we want to remove it from the
5695          * idr
5696          */
5697         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5698                                 patched_cb_handle << PAGE_SHIFT);
5699
5700 free_userptr:
5701         if (rc)
5702                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5703         return rc;
5704 }
5705
5706 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5707                                         struct hl_cs_parser *parser)
5708 {
5709         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5710         struct gaudi_device *gaudi = hdev->asic_specific;
5711         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5712                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5713
5714         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5715                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5716                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5717                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5718                                 parser->hw_queue_id);
5719                 return -EINVAL;
5720         }
5721
5722         /* For internal queue jobs just check if CB address is valid */
5723         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5724                                         parser->user_cb_size,
5725                                         asic_prop->sram_user_base_address,
5726                                         asic_prop->sram_end_address))
5727                 return 0;
5728
5729         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5730                                         parser->user_cb_size,
5731                                         asic_prop->dram_user_base_address,
5732                                         asic_prop->dram_end_address))
5733                 return 0;
5734
5735         /* PMMU and HPMMU addresses are equal, check only one of them */
5736         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5737                                         parser->user_cb_size,
5738                                         asic_prop->pmmu.start_addr,
5739                                         asic_prop->pmmu.end_addr))
5740                 return 0;
5741
5742         dev_err(hdev->dev,
5743                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5744                 parser->user_cb, parser->user_cb_size);
5745
5746         return -EFAULT;
5747 }
5748
5749 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5750 {
5751         struct gaudi_device *gaudi = hdev->asic_specific;
5752
5753         if (parser->queue_type == QUEUE_TYPE_INT)
5754                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5755
5756         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5757                 return gaudi_parse_cb_mmu(hdev, parser);
5758         else
5759                 return gaudi_parse_cb_no_mmu(hdev, parser);
5760 }
5761
5762 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5763                                         void *kernel_address, u32 len,
5764                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5765                                         bool eb)
5766 {
5767         struct gaudi_device *gaudi = hdev->asic_specific;
5768         struct packet_msg_prot *cq_pkt;
5769         u32 tmp;
5770
5771         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5772
5773         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5774         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5775
5776         if (eb)
5777                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5778
5779         cq_pkt->ctl = cpu_to_le32(tmp);
5780         cq_pkt->value = cpu_to_le32(cq_val);
5781         cq_pkt->addr = cpu_to_le64(cq_addr);
5782
5783         cq_pkt++;
5784
5785         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5786         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5787         cq_pkt->ctl = cpu_to_le32(tmp);
5788         cq_pkt->value = cpu_to_le32(1);
5789
5790         if (!gaudi->multi_msi_mode)
5791                 msi_vec = 0;
5792
5793         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5794 }
5795
5796 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5797 {
5798         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5799 }
5800
5801 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5802                                         u32 size, u64 val)
5803 {
5804         struct packet_lin_dma *lin_dma_pkt;
5805         struct hl_cs_job *job;
5806         u32 cb_size, ctl, err_cause;
5807         struct hl_cb *cb;
5808         u64 id;
5809         int rc;
5810
5811         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5812         if (!cb)
5813                 return -EFAULT;
5814
5815         lin_dma_pkt = cb->kernel_address;
5816         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5817         cb_size = sizeof(*lin_dma_pkt);
5818
5819         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5820         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5821         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5822         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5823         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5824
5825         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5826         lin_dma_pkt->src_addr = cpu_to_le64(val);
5827         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5828         lin_dma_pkt->tsize = cpu_to_le32(size);
5829
5830         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5831         if (!job) {
5832                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5833                 rc = -ENOMEM;
5834                 goto release_cb;
5835         }
5836
5837         /* Verify DMA is OK */
5838         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5839         if (err_cause && !hdev->init_done) {
5840                 dev_dbg(hdev->dev,
5841                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5842                         err_cause);
5843                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5844         }
5845
5846         job->id = 0;
5847         job->user_cb = cb;
5848         atomic_inc(&job->user_cb->cs_cnt);
5849         job->user_cb_size = cb_size;
5850         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5851         job->patched_cb = job->user_cb;
5852         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5853
5854         hl_debugfs_add_job(hdev, job);
5855
5856         rc = gaudi_send_job_on_qman0(hdev, job);
5857         hl_debugfs_remove_job(hdev, job);
5858         kfree(job);
5859         atomic_dec(&cb->cs_cnt);
5860
5861         /* Verify DMA is OK */
5862         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5863         if (err_cause) {
5864                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5865                 rc = -EIO;
5866                 if (!hdev->init_done) {
5867                         dev_dbg(hdev->dev,
5868                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5869                                 err_cause);
5870                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5871                 }
5872         }
5873
5874 release_cb:
5875         id = cb->id;
5876         hl_cb_put(cb);
5877         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5878
5879         return rc;
5880 }
5881
5882 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5883                                         u32 num_regs, u32 val)
5884 {
5885         struct packet_msg_long *pkt;
5886         struct hl_cs_job *job;
5887         u32 cb_size, ctl;
5888         struct hl_cb *cb;
5889         int i, rc;
5890
5891         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5892
5893         if (cb_size > SZ_2M) {
5894                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5895                 return -ENOMEM;
5896         }
5897
5898         cb = hl_cb_kernel_create(hdev, cb_size, false);
5899         if (!cb)
5900                 return -EFAULT;
5901
5902         pkt = cb->kernel_address;
5903
5904         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5905         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5906         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5907         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5908         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5909
5910         for (i = 0; i < num_regs ; i++, pkt++) {
5911                 pkt->ctl = cpu_to_le32(ctl);
5912                 pkt->value = cpu_to_le32(val);
5913                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5914         }
5915
5916         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5917         if (!job) {
5918                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5919                 rc = -ENOMEM;
5920                 goto release_cb;
5921         }
5922
5923         job->id = 0;
5924         job->user_cb = cb;
5925         atomic_inc(&job->user_cb->cs_cnt);
5926         job->user_cb_size = cb_size;
5927         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5928         job->patched_cb = job->user_cb;
5929         job->job_cb_size = cb_size;
5930
5931         hl_debugfs_add_job(hdev, job);
5932
5933         rc = gaudi_send_job_on_qman0(hdev, job);
5934         hl_debugfs_remove_job(hdev, job);
5935         kfree(job);
5936         atomic_dec(&cb->cs_cnt);
5937
5938 release_cb:
5939         hl_cb_put(cb);
5940         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5941
5942         return rc;
5943 }
5944
5945 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5946 {
5947         u64 base_addr;
5948         u32 num_regs;
5949         int rc;
5950
5951         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5952         num_regs = NUM_OF_SOB_IN_BLOCK;
5953         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5954         if (rc) {
5955                 dev_err(hdev->dev, "failed resetting SM registers");
5956                 return -ENOMEM;
5957         }
5958
5959         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5960         num_regs = NUM_OF_SOB_IN_BLOCK;
5961         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5962         if (rc) {
5963                 dev_err(hdev->dev, "failed resetting SM registers");
5964                 return -ENOMEM;
5965         }
5966
5967         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5968         num_regs = NUM_OF_SOB_IN_BLOCK;
5969         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5970         if (rc) {
5971                 dev_err(hdev->dev, "failed resetting SM registers");
5972                 return -ENOMEM;
5973         }
5974
5975         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5976         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5977         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5978         if (rc) {
5979                 dev_err(hdev->dev, "failed resetting SM registers");
5980                 return -ENOMEM;
5981         }
5982
5983         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5984         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5985         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5986         if (rc) {
5987                 dev_err(hdev->dev, "failed resetting SM registers");
5988                 return -ENOMEM;
5989         }
5990
5991         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5992         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5993         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5994         if (rc) {
5995                 dev_err(hdev->dev, "failed resetting SM registers");
5996                 return -ENOMEM;
5997         }
5998
5999         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6000                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
6001         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
6002         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6003         if (rc) {
6004                 dev_err(hdev->dev, "failed resetting SM registers");
6005                 return -ENOMEM;
6006         }
6007
6008         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
6009                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
6010         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
6011         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
6012         if (rc) {
6013                 dev_err(hdev->dev, "failed resetting SM registers");
6014                 return -ENOMEM;
6015         }
6016
6017         return 0;
6018 }
6019
6020 static void gaudi_restore_dma_registers(struct hl_device *hdev)
6021 {
6022         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
6023                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
6024         int i;
6025
6026         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6027                 u64 sob_addr = CFG_BASE +
6028                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6029                                 (i * sob_delta);
6030                 u32 dma_offset = i * DMA_CORE_OFFSET;
6031
6032                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
6033                                 lower_32_bits(sob_addr));
6034                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
6035                                 upper_32_bits(sob_addr));
6036                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
6037
6038                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
6039                  * modified by the user for SRAM reduction
6040                  */
6041                 if (i > 1)
6042                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
6043                                                                 0x00000001);
6044         }
6045 }
6046
6047 static void gaudi_restore_qm_registers(struct hl_device *hdev)
6048 {
6049         u32 qman_offset;
6050         int i;
6051
6052         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
6053                 qman_offset = i * DMA_QMAN_OFFSET;
6054                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
6055         }
6056
6057         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
6058                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
6059                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
6060         }
6061
6062         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6063                 qman_offset = i * TPC_QMAN_OFFSET;
6064                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
6065         }
6066
6067         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
6068                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
6069                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
6070                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
6071         }
6072 }
6073
6074 static int gaudi_restore_user_registers(struct hl_device *hdev)
6075 {
6076         int rc;
6077
6078         rc = gaudi_restore_sm_registers(hdev);
6079         if (rc)
6080                 return rc;
6081
6082         gaudi_restore_dma_registers(hdev);
6083         gaudi_restore_qm_registers(hdev);
6084
6085         return 0;
6086 }
6087
6088 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6089 {
6090         return gaudi_restore_user_registers(hdev);
6091 }
6092
6093 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6094 {
6095         struct asic_fixed_properties *prop = &hdev->asic_prop;
6096         struct gaudi_device *gaudi = hdev->asic_specific;
6097         u64 addr = prop->mmu_pgt_addr;
6098         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6099
6100         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6101                 return 0;
6102
6103         return gaudi_memset_device_memory(hdev, addr, size, 0);
6104 }
6105
6106 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6107 {
6108
6109 }
6110
6111 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6112                         bool user_address, u32 *val)
6113 {
6114         struct asic_fixed_properties *prop = &hdev->asic_prop;
6115         struct gaudi_device *gaudi = hdev->asic_specific;
6116         u64 hbm_bar_addr, host_phys_end;
6117         int rc = 0;
6118
6119         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6120
6121         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6122
6123                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6124                                 (hdev->clock_gating_mask &
6125                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6126
6127                         dev_err_ratelimited(hdev->dev,
6128                                 "Can't read register - clock gating is enabled!\n");
6129                         rc = -EFAULT;
6130                 } else {
6131                         *val = RREG32(addr - CFG_BASE);
6132                 }
6133
6134         } else if ((addr >= SRAM_BASE_ADDR) &&
6135                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6136                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6137                                 (addr - SRAM_BASE_ADDR));
6138         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6139                 u64 bar_base_addr = DRAM_PHYS_BASE +
6140                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6141
6142                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6143                 if (hbm_bar_addr != U64_MAX) {
6144                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6145                                                 (addr - bar_base_addr));
6146
6147                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6148                                                 hbm_bar_addr);
6149                 }
6150                 if (hbm_bar_addr == U64_MAX)
6151                         rc = -EIO;
6152         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6153                         user_address && !iommu_present(&pci_bus_type)) {
6154                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6155         } else {
6156                 rc = -EFAULT;
6157         }
6158
6159         return rc;
6160 }
6161
6162 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6163                         bool user_address, u32 val)
6164 {
6165         struct asic_fixed_properties *prop = &hdev->asic_prop;
6166         struct gaudi_device *gaudi = hdev->asic_specific;
6167         u64 hbm_bar_addr, host_phys_end;
6168         int rc = 0;
6169
6170         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6171
6172         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6173
6174                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6175                                 (hdev->clock_gating_mask &
6176                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6177
6178                         dev_err_ratelimited(hdev->dev,
6179                                 "Can't write register - clock gating is enabled!\n");
6180                         rc = -EFAULT;
6181                 } else {
6182                         WREG32(addr - CFG_BASE, val);
6183                 }
6184
6185         } else if ((addr >= SRAM_BASE_ADDR) &&
6186                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6187                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6188                                         (addr - SRAM_BASE_ADDR));
6189         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6190                 u64 bar_base_addr = DRAM_PHYS_BASE +
6191                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6192
6193                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6194                 if (hbm_bar_addr != U64_MAX) {
6195                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6196                                                 (addr - bar_base_addr));
6197
6198                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6199                                                 hbm_bar_addr);
6200                 }
6201                 if (hbm_bar_addr == U64_MAX)
6202                         rc = -EIO;
6203         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6204                         user_address && !iommu_present(&pci_bus_type)) {
6205                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6206         } else {
6207                 rc = -EFAULT;
6208         }
6209
6210         return rc;
6211 }
6212
6213 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6214                                 bool user_address, u64 *val)
6215 {
6216         struct asic_fixed_properties *prop = &hdev->asic_prop;
6217         struct gaudi_device *gaudi = hdev->asic_specific;
6218         u64 hbm_bar_addr, host_phys_end;
6219         int rc = 0;
6220
6221         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6222
6223         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6224
6225                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6226                                 (hdev->clock_gating_mask &
6227                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6228
6229                         dev_err_ratelimited(hdev->dev,
6230                                 "Can't read register - clock gating is enabled!\n");
6231                         rc = -EFAULT;
6232                 } else {
6233                         u32 val_l = RREG32(addr - CFG_BASE);
6234                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6235
6236                         *val = (((u64) val_h) << 32) | val_l;
6237                 }
6238
6239         } else if ((addr >= SRAM_BASE_ADDR) &&
6240                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6241                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6242                                 (addr - SRAM_BASE_ADDR));
6243         } else if (addr <=
6244                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6245                 u64 bar_base_addr = DRAM_PHYS_BASE +
6246                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6247
6248                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6249                 if (hbm_bar_addr != U64_MAX) {
6250                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6251                                                 (addr - bar_base_addr));
6252
6253                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6254                                                 hbm_bar_addr);
6255                 }
6256                 if (hbm_bar_addr == U64_MAX)
6257                         rc = -EIO;
6258         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6259                         user_address && !iommu_present(&pci_bus_type)) {
6260                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6261         } else {
6262                 rc = -EFAULT;
6263         }
6264
6265         return rc;
6266 }
6267
6268 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6269                                 bool user_address, u64 val)
6270 {
6271         struct asic_fixed_properties *prop = &hdev->asic_prop;
6272         struct gaudi_device *gaudi = hdev->asic_specific;
6273         u64 hbm_bar_addr, host_phys_end;
6274         int rc = 0;
6275
6276         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6277
6278         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6279
6280                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6281                                 (hdev->clock_gating_mask &
6282                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6283
6284                         dev_err_ratelimited(hdev->dev,
6285                                 "Can't write register - clock gating is enabled!\n");
6286                         rc = -EFAULT;
6287                 } else {
6288                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6289                         WREG32(addr + sizeof(u32) - CFG_BASE,
6290                                 upper_32_bits(val));
6291                 }
6292
6293         } else if ((addr >= SRAM_BASE_ADDR) &&
6294                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6295                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6296                                         (addr - SRAM_BASE_ADDR));
6297         } else if (addr <=
6298                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6299                 u64 bar_base_addr = DRAM_PHYS_BASE +
6300                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6301
6302                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6303                 if (hbm_bar_addr != U64_MAX) {
6304                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6305                                                 (addr - bar_base_addr));
6306
6307                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6308                                                 hbm_bar_addr);
6309                 }
6310                 if (hbm_bar_addr == U64_MAX)
6311                         rc = -EIO;
6312         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6313                         user_address && !iommu_present(&pci_bus_type)) {
6314                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6315         } else {
6316                 rc = -EFAULT;
6317         }
6318
6319         return rc;
6320 }
6321
6322 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6323                                         u32 size_to_dma, dma_addr_t dma_addr)
6324 {
6325         u32 err_cause, val;
6326         u64 dma_offset;
6327         int rc;
6328
6329         dma_offset = dma_id * DMA_CORE_OFFSET;
6330
6331         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6332         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6333         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6334         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6335         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6336         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6337                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6338
6339         rc = hl_poll_timeout(
6340                 hdev,
6341                 mmDMA0_CORE_STS0 + dma_offset,
6342                 val,
6343                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6344                 0,
6345                 1000000);
6346
6347         if (rc) {
6348                 dev_err(hdev->dev,
6349                         "DMA %d timed-out during reading of 0x%llx\n",
6350                         dma_id, addr);
6351                 return -EIO;
6352         }
6353
6354         /* Verify DMA is OK */
6355         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6356         if (err_cause) {
6357                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6358                 dev_dbg(hdev->dev,
6359                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6360                         err_cause);
6361                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6362
6363                 return -EIO;
6364         }
6365
6366         return 0;
6367 }
6368
6369 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6370                                 void *blob_addr)
6371 {
6372         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6373         struct gaudi_device *gaudi = hdev->asic_specific;
6374         u64 dma_offset, qm_offset;
6375         dma_addr_t dma_addr;
6376         void *kernel_addr;
6377         bool is_eng_idle;
6378         int rc = 0, dma_id;
6379
6380         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6381                                                 hdev, SZ_2M,
6382                                                 &dma_addr,
6383                                                 GFP_KERNEL | __GFP_ZERO);
6384
6385         if (!kernel_addr)
6386                 return -ENOMEM;
6387
6388         mutex_lock(&gaudi->clk_gate_mutex);
6389
6390         hdev->asic_funcs->disable_clock_gating(hdev);
6391
6392         hdev->asic_funcs->hw_queues_lock(hdev);
6393
6394         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6395         dma_offset = dma_id * DMA_CORE_OFFSET;
6396         qm_offset = dma_id * DMA_QMAN_OFFSET;
6397         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6398         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6399
6400         if (!is_eng_idle) {
6401                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6402                 dma_offset = dma_id * DMA_CORE_OFFSET;
6403                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6404                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6405                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6406
6407                 if (!is_eng_idle) {
6408                         dev_err_ratelimited(hdev->dev,
6409                                 "Can't read via DMA because it is BUSY\n");
6410                         rc = -EAGAIN;
6411                         goto out;
6412                 }
6413         }
6414
6415         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6416         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6417                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6418
6419         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6420          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6421          * ASID
6422          */
6423         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6424
6425         /* Verify DMA is OK */
6426         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6427         if (err_cause) {
6428                 dev_dbg(hdev->dev,
6429                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6430                         err_cause);
6431                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6432         }
6433
6434         pos = 0;
6435         size_left = size;
6436         size_to_dma = SZ_2M;
6437
6438         while (size_left > 0) {
6439
6440                 if (size_left < SZ_2M)
6441                         size_to_dma = size_left;
6442
6443                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6444                                                 dma_addr);
6445                 if (rc)
6446                         break;
6447
6448                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6449
6450                 if (size_left <= SZ_2M)
6451                         break;
6452
6453                 pos += SZ_2M;
6454                 addr += SZ_2M;
6455                 size_left -= SZ_2M;
6456         }
6457
6458         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6459          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6460          * ASID
6461          */
6462         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6463                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6464
6465         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6466
6467 out:
6468         hdev->asic_funcs->hw_queues_unlock(hdev);
6469
6470         hdev->asic_funcs->set_clock_gating(hdev);
6471
6472         mutex_unlock(&gaudi->clk_gate_mutex);
6473
6474         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6475                                                 dma_addr);
6476
6477         return rc;
6478 }
6479
6480 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6481 {
6482         struct gaudi_device *gaudi = hdev->asic_specific;
6483
6484         if (hdev->hard_reset_pending)
6485                 return U64_MAX;
6486
6487         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6488                         (addr - gaudi->hbm_bar_cur_addr));
6489 }
6490
6491 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6492 {
6493         struct gaudi_device *gaudi = hdev->asic_specific;
6494
6495         if (hdev->hard_reset_pending)
6496                 return;
6497
6498         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6499                         (addr - gaudi->hbm_bar_cur_addr));
6500 }
6501
6502 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6503 {
6504         /* mask to zero the MMBP and ASID bits */
6505         WREG32_AND(reg, ~0x7FF);
6506         WREG32_OR(reg, asid);
6507 }
6508
6509 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6510 {
6511         struct gaudi_device *gaudi = hdev->asic_specific;
6512
6513         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6514                 return;
6515
6516         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6517                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6518                 return;
6519         }
6520
6521         mutex_lock(&gaudi->clk_gate_mutex);
6522
6523         hdev->asic_funcs->disable_clock_gating(hdev);
6524
6525         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6526         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6527         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6528         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6529         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6530
6531         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6532         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6533         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6534         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6535         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6536
6537         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6538         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6539         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6540         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6541         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6542
6543         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6544         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6545         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6547         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6548
6549         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6550         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6551         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6554
6555         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6556         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6557         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6560
6561         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6562         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6563         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6566
6567         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6572
6573         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6581
6582         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6589
6590         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6597
6598         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6599         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6600         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6601         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6602         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6603         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6604         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6605
6606         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6607         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6608         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6609         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6610         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6611         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6612         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6613
6614         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6615         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6616         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6617         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6618         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6619         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6620         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6621
6622         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6623         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6624         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6625         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6626         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6627         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6628         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6629
6630         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6631         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6632         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6633         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6634         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6635         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6636         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6637
6638         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6639         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6640         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6641         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6642         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6643         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6644         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6645
6646         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6647         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6648         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6649         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6650         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6651         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6652         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6653         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6654         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6655         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6656
6657         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6658         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6659         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6660         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6661         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6662         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6663         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6664         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6665         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6666         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6667         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6668         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6669
6670         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6671                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6672                                 asid);
6673                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6674                                 asid);
6675                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6676                                 asid);
6677                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6678                                 asid);
6679                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6680                                 asid);
6681         }
6682
6683         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6684                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6685                                 asid);
6686                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6687                                 asid);
6688                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6689                                 asid);
6690                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6691                                 asid);
6692                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6693                                 asid);
6694         }
6695
6696         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6697                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6698                                 asid);
6699                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6700                                 asid);
6701                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6702                                 asid);
6703                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6704                                 asid);
6705                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6706                                 asid);
6707         }
6708
6709         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6710                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6711                                 asid);
6712                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6713                                 asid);
6714                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6715                                 asid);
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6719                                 asid);
6720         }
6721
6722         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6723                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6724                                 asid);
6725                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6726                                 asid);
6727                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6728                                 asid);
6729                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6730                                 asid);
6731                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6732                                 asid);
6733         }
6734
6735         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6736                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6737                                 asid);
6738                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6739                                 asid);
6740                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6741                                 asid);
6742                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6743                                 asid);
6744                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6745                                 asid);
6746         }
6747
6748         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6749                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6750                                 asid);
6751                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6752                                 asid);
6753                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6754                                 asid);
6755                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6756                                 asid);
6757                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6758                                 asid);
6759         }
6760
6761         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6762                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6763                                 asid);
6764                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6765                                 asid);
6766                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6767                                 asid);
6768                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6769                                 asid);
6770                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6771                                 asid);
6772         }
6773
6774         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6775                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6776                                 asid);
6777                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6778                                 asid);
6779                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6780                                 asid);
6781                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6782                                 asid);
6783                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6784                                 asid);
6785         }
6786
6787         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6788                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6789                                 asid);
6790                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6791                                 asid);
6792                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6793                                 asid);
6794                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6795                                 asid);
6796                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6797                                 asid);
6798         }
6799
6800         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6801         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6802
6803         hdev->asic_funcs->set_clock_gating(hdev);
6804
6805         mutex_unlock(&gaudi->clk_gate_mutex);
6806 }
6807
6808 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6809                 struct hl_cs_job *job)
6810 {
6811         struct packet_msg_prot *fence_pkt;
6812         u32 *fence_ptr;
6813         dma_addr_t fence_dma_addr;
6814         struct hl_cb *cb;
6815         u32 tmp, timeout, dma_offset;
6816         int rc;
6817
6818         if (hdev->pldm)
6819                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6820         else
6821                 timeout = HL_DEVICE_TIMEOUT_USEC;
6822
6823         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6824                 dev_err_ratelimited(hdev->dev,
6825                         "Can't send driver job on QMAN0 because the device is not idle\n");
6826                 return -EBUSY;
6827         }
6828
6829         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6830                                                         &fence_dma_addr);
6831         if (!fence_ptr) {
6832                 dev_err(hdev->dev,
6833                         "Failed to allocate fence memory for QMAN0\n");
6834                 return -ENOMEM;
6835         }
6836
6837         cb = job->patched_cb;
6838
6839         fence_pkt = cb->kernel_address +
6840                         job->job_cb_size - sizeof(struct packet_msg_prot);
6841
6842         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6843         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6844         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6845
6846         fence_pkt->ctl = cpu_to_le32(tmp);
6847         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6848         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6849
6850         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6851
6852         WREG32(mmDMA0_CORE_PROT + dma_offset,
6853                         BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6854
6855         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6856                                         job->job_cb_size, cb->bus_address);
6857         if (rc) {
6858                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6859                 goto free_fence_ptr;
6860         }
6861
6862         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6863                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6864                                 timeout, true);
6865
6866         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6867
6868         if (rc == -ETIMEDOUT) {
6869                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6870                 goto free_fence_ptr;
6871         }
6872
6873 free_fence_ptr:
6874         WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6875
6876         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6877                                         fence_dma_addr);
6878         return rc;
6879 }
6880
6881 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6882 {
6883         if (event_type >= GAUDI_EVENT_SIZE)
6884                 goto event_not_supported;
6885
6886         if (!gaudi_irq_map_table[event_type].valid)
6887                 goto event_not_supported;
6888
6889         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6890
6891         return;
6892
6893 event_not_supported:
6894         snprintf(desc, size, "N/A");
6895 }
6896
6897 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6898                                                         u32 x_y, bool is_write)
6899 {
6900         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6901
6902         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6903                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6904
6905         switch (x_y) {
6906         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6907         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6908                 dma_id[0] = 0;
6909                 dma_id[1] = 2;
6910                 break;
6911         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6912         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6913                 dma_id[0] = 1;
6914                 dma_id[1] = 3;
6915                 break;
6916         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6917         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6918                 dma_id[0] = 4;
6919                 dma_id[1] = 6;
6920                 break;
6921         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6922         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6923                 dma_id[0] = 5;
6924                 dma_id[1] = 7;
6925                 break;
6926         default:
6927                 goto unknown_initiator;
6928         }
6929
6930         for (i = 0 ; i < 2 ; i++) {
6931                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6932                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6933         }
6934
6935         switch (x_y) {
6936         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6937         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6938                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6939                         return "DMA0";
6940                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6941                         return "DMA2";
6942                 else
6943                         return "DMA0 or DMA2";
6944         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6945         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6946                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6947                         return "DMA1";
6948                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6949                         return "DMA3";
6950                 else
6951                         return "DMA1 or DMA3";
6952         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6953         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6954                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6955                         return "DMA4";
6956                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6957                         return "DMA6";
6958                 else
6959                         return "DMA4 or DMA6";
6960         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6961         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6962                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6963                         return "DMA5";
6964                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6965                         return "DMA7";
6966                 else
6967                         return "DMA5 or DMA7";
6968         }
6969
6970 unknown_initiator:
6971         return "unknown initiator";
6972 }
6973
6974 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6975                                                         bool is_write)
6976 {
6977         u32 val, x_y, axi_id;
6978
6979         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6980                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6981         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6982                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6983         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6984                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6985
6986         switch (x_y) {
6987         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6988                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6989                         return "TPC0";
6990                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6991                         return "NIC0";
6992                 break;
6993         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6994                 return "TPC1";
6995         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6996         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6997                 return "MME0";
6998         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6999         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
7000                 return "MME1";
7001         case RAZWI_INITIATOR_ID_X_Y_TPC2:
7002                 return "TPC2";
7003         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
7004                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7005                         return "TPC3";
7006                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
7007                         return "PCI";
7008                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
7009                         return "CPU";
7010                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
7011                         return "PSOC";
7012                 break;
7013         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
7014         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
7015         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
7016         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
7017         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
7018         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
7019         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
7020         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
7021                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
7022         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
7023                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7024                         return "TPC4";
7025                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7026                         return "NIC1";
7027                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7028                         return "NIC2";
7029                 break;
7030         case RAZWI_INITIATOR_ID_X_Y_TPC5:
7031                 return "TPC5";
7032         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
7033         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
7034                 return "MME2";
7035         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
7036         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
7037                 return "MME3";
7038         case RAZWI_INITIATOR_ID_X_Y_TPC6:
7039                 return "TPC6";
7040         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
7041                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
7042                         return "TPC7";
7043                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
7044                         return "NIC4";
7045                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
7046                         return "NIC5";
7047                 break;
7048         default:
7049                 break;
7050         }
7051
7052         dev_err(hdev->dev,
7053                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
7054                 val,
7055                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
7056                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
7057                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
7058                         RAZWI_INITIATOR_AXI_ID_MASK);
7059
7060         return "unknown initiator";
7061 }
7062
7063 static void gaudi_print_razwi_info(struct hl_device *hdev)
7064 {
7065         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
7066                 dev_err_ratelimited(hdev->dev,
7067                         "RAZWI event caused by illegal write of %s\n",
7068                         gaudi_get_razwi_initiator_name(hdev, true));
7069                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
7070         }
7071
7072         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
7073                 dev_err_ratelimited(hdev->dev,
7074                         "RAZWI event caused by illegal read of %s\n",
7075                         gaudi_get_razwi_initiator_name(hdev, false));
7076                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7077         }
7078 }
7079
7080 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7081 {
7082         struct gaudi_device *gaudi = hdev->asic_specific;
7083         u64 addr;
7084         u32 val;
7085
7086         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7087                 return;
7088
7089         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7090         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7091                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7092                 addr <<= 32;
7093                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7094
7095                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7096                                         addr);
7097
7098                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7099         }
7100
7101         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7102         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7103                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7104                 addr <<= 32;
7105                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7106
7107                 dev_err_ratelimited(hdev->dev,
7108                                 "MMU access error on va 0x%llx\n", addr);
7109
7110                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7111         }
7112 }
7113
7114 /*
7115  *  +-------------------+------------------------------------------------------+
7116  *  | Configuration Reg |                     Description                      |
7117  *  |      Address      |                                                      |
7118  *  +-------------------+------------------------------------------------------+
7119  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7120  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7121  *  |                   |0xF34 memory wrappers 63:32                           |
7122  *  |                   |0xF38 memory wrappers 95:64                           |
7123  *  |                   |0xF3C memory wrappers 127:96                          |
7124  *  +-------------------+------------------------------------------------------+
7125  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7126  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7127  *  |                   |0xF44 memory wrappers 63:32                           |
7128  *  |                   |0xF48 memory wrappers 95:64                           |
7129  *  |                   |0xF4C memory wrappers 127:96                          |
7130  *  +-------------------+------------------------------------------------------+
7131  */
7132 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7133                 struct ecc_info_extract_params *params, u64 *ecc_address,
7134                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7135 {
7136         struct gaudi_device *gaudi = hdev->asic_specific;
7137         u32 i, num_mem_regs, reg, err_bit;
7138         u64 err_addr, err_word = 0;
7139         int rc = 0;
7140
7141         num_mem_regs = params->num_memories / 32 +
7142                         ((params->num_memories % 32) ? 1 : 0);
7143
7144         if (params->block_address >= CFG_BASE)
7145                 params->block_address -= CFG_BASE;
7146
7147         if (params->derr)
7148                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7149         else
7150                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7151
7152         if (params->disable_clock_gating) {
7153                 mutex_lock(&gaudi->clk_gate_mutex);
7154                 hdev->asic_funcs->disable_clock_gating(hdev);
7155         }
7156
7157         /* Set invalid wrapper index */
7158         *memory_wrapper_idx = 0xFF;
7159
7160         /* Iterate through memory wrappers, a single bit must be set */
7161         for (i = 0 ; i < num_mem_regs ; i++) {
7162                 err_addr += i * 4;
7163                 err_word = RREG32(err_addr);
7164                 if (err_word) {
7165                         err_bit = __ffs(err_word);
7166                         *memory_wrapper_idx = err_bit + (32 * i);
7167                         break;
7168                 }
7169         }
7170
7171         if (*memory_wrapper_idx == 0xFF) {
7172                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7173                 rc = -EINVAL;
7174                 goto enable_clk_gate;
7175         }
7176
7177         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7178                         *memory_wrapper_idx);
7179
7180         *ecc_address =
7181                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7182         *ecc_syndrom =
7183                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7184
7185         /* Clear error indication */
7186         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7187         if (params->derr)
7188                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7189         else
7190                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7191
7192         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7193
7194 enable_clk_gate:
7195         if (params->disable_clock_gating) {
7196                 hdev->asic_funcs->set_clock_gating(hdev);
7197
7198                 mutex_unlock(&gaudi->clk_gate_mutex);
7199         }
7200
7201         return rc;
7202 }
7203
7204 /*
7205  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7206  *
7207  * @idx: the current pi/ci value
7208  * @q_len: the queue length (power of 2)
7209  *
7210  * @return the cyclically decremented index
7211  */
7212 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7213 {
7214         u32 mask = q_len - 1;
7215
7216         /*
7217          * modular decrement is equivalent to adding (queue_size -1)
7218          * later we take LSBs to make sure the value is in the
7219          * range [0, queue_len - 1]
7220          */
7221         return (idx + q_len - 1) & mask;
7222 }
7223
7224 /**
7225  * gaudi_print_sw_config_stream_data - print SW config stream data
7226  *
7227  * @hdev: pointer to the habanalabs device structure
7228  * @stream: the QMAN's stream
7229  * @qman_base: base address of QMAN registers block
7230  */
7231 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7232                                                 u64 qman_base)
7233 {
7234         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7235         u32 cq_ptr_lo_off, size;
7236
7237         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7238
7239         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7240                                                 stream * cq_ptr_lo_off;
7241         cq_ptr_hi = cq_ptr_lo +
7242                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7243         cq_tsize = cq_ptr_lo +
7244                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7245
7246         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7247         size = RREG32(cq_tsize);
7248         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7249                                                         stream, cq_ptr, size);
7250 }
7251
7252 /**
7253  * gaudi_print_last_pqes_on_err - print last PQEs on error
7254  *
7255  * @hdev: pointer to the habanalabs device structure
7256  * @qid_base: first QID of the QMAN (out of 4 streams)
7257  * @stream: the QMAN's stream
7258  * @qman_base: base address of QMAN registers block
7259  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7260  */
7261 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7262                                                 u32 stream, u64 qman_base,
7263                                                 bool pr_sw_conf)
7264 {
7265         u32 ci, qm_ci_stream_off, queue_len;
7266         struct hl_hw_queue *q;
7267         u64 pq_ci;
7268         int i;
7269
7270         q = &hdev->kernel_queues[qid_base + stream];
7271
7272         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7273         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7274                                                 stream * qm_ci_stream_off;
7275
7276         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7277                                         q->int_queue_len : HL_QUEUE_LENGTH;
7278
7279         hdev->asic_funcs->hw_queues_lock(hdev);
7280
7281         if (pr_sw_conf)
7282                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7283
7284         ci = RREG32(pq_ci);
7285
7286         /* we should start printing form ci -1 */
7287         ci = gaudi_queue_idx_dec(ci, queue_len);
7288
7289         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7290                 struct hl_bd *bd;
7291                 u64 addr;
7292                 u32 len;
7293
7294                 bd = q->kernel_address;
7295                 bd += ci;
7296
7297                 len = le32_to_cpu(bd->len);
7298                 /* len 0 means uninitialized entry- break */
7299                 if (!len)
7300                         break;
7301
7302                 addr = le64_to_cpu(bd->ptr);
7303
7304                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7305                                                         stream, ci, addr, len);
7306
7307                 /* get previous ci, wrap if needed */
7308                 ci = gaudi_queue_idx_dec(ci, queue_len);
7309         }
7310
7311         hdev->asic_funcs->hw_queues_unlock(hdev);
7312 }
7313
7314 /**
7315  * print_qman_data_on_err - extract QMAN data on error
7316  *
7317  * @hdev: pointer to the habanalabs device structure
7318  * @qid_base: first QID of the QMAN (out of 4 streams)
7319  * @stream: the QMAN's stream
7320  * @qman_base: base address of QMAN registers block
7321  *
7322  * This function attempt to exatract as much data as possible on QMAN error.
7323  * On upper CP print the SW config stream data and last 8 PQEs.
7324  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7325  */
7326 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7327                                                 u32 stream, u64 qman_base)
7328 {
7329         u32 i;
7330
7331         if (stream != QMAN_STREAMS) {
7332                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7333                                                                         true);
7334                 return;
7335         }
7336
7337         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7338
7339         for (i = 0; i < QMAN_STREAMS; i++)
7340                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7341                                                                         false);
7342 }
7343
7344 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7345                                           const char *qm_name,
7346                                           u64 qman_base,
7347                                           u32 qid_base)
7348 {
7349         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7350         u64 glbl_sts_addr, arb_err_addr;
7351         char reg_desc[32];
7352
7353         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7354         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7355
7356         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7357         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7358                 glbl_sts_clr_val = 0;
7359                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7360
7361                 if (!glbl_sts_val)
7362                         continue;
7363
7364                 if (i == QMAN_STREAMS)
7365                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7366                 else
7367                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7368
7369                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7370                         if (glbl_sts_val & BIT(j)) {
7371                                 dev_err_ratelimited(hdev->dev,
7372                                                 "%s %s. err cause: %s\n",
7373                                                 qm_name, reg_desc,
7374                                                 gaudi_qman_error_cause[j]);
7375                                 glbl_sts_clr_val |= BIT(j);
7376                         }
7377                 }
7378
7379                 /* Write 1 clear errors */
7380                 if (!hdev->stop_on_err)
7381                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7382                 else
7383                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7384         }
7385
7386         arb_err_val = RREG32(arb_err_addr);
7387
7388         if (!arb_err_val)
7389                 return;
7390
7391         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7392                 if (arb_err_val & BIT(j)) {
7393                         dev_err_ratelimited(hdev->dev,
7394                                         "%s ARB_ERR. err cause: %s\n",
7395                                         qm_name,
7396                                         gaudi_qman_arb_error_cause[j]);
7397                 }
7398         }
7399 }
7400
7401 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7402                 struct hl_eq_sm_sei_data *sei_data)
7403 {
7404         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7405
7406         /* Flip the bits as the enum is ordered in the opposite way */
7407         index = (index ^ 0x3) & 0x3;
7408
7409         switch (sei_data->sei_cause) {
7410         case SM_SEI_SO_OVERFLOW:
7411                 dev_err_ratelimited(hdev->dev,
7412                         "%s SEI Error: SOB Group %u overflow/underflow",
7413                         gaudi_sync_manager_names[index],
7414                         le32_to_cpu(sei_data->sei_log));
7415                 break;
7416         case SM_SEI_LBW_4B_UNALIGNED:
7417                 dev_err_ratelimited(hdev->dev,
7418                         "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7419                         gaudi_sync_manager_names[index],
7420                         le32_to_cpu(sei_data->sei_log));
7421                 break;
7422         case SM_SEI_AXI_RESPONSE_ERR:
7423                 dev_err_ratelimited(hdev->dev,
7424                         "%s SEI Error: AXI ID %u response error",
7425                         gaudi_sync_manager_names[index],
7426                         le32_to_cpu(sei_data->sei_log));
7427                 break;
7428         default:
7429                 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7430                                 le32_to_cpu(sei_data->sei_log));
7431                 break;
7432         }
7433 }
7434
7435 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7436                 struct hl_eq_ecc_data *ecc_data)
7437 {
7438         struct ecc_info_extract_params params;
7439         u64 ecc_address = 0, ecc_syndrom = 0;
7440         u8 index, memory_wrapper_idx = 0;
7441         bool extract_info_from_fw;
7442         int rc;
7443
7444         switch (event_type) {
7445         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7446         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7447                 extract_info_from_fw = true;
7448                 break;
7449         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7450                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7451                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7452                 params.num_memories = 90;
7453                 params.derr = false;
7454                 params.disable_clock_gating = true;
7455                 extract_info_from_fw = false;
7456                 break;
7457         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7458                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7459                 params.block_address =
7460                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7461                 params.num_memories = 90;
7462                 params.derr = true;
7463                 params.disable_clock_gating = true;
7464                 extract_info_from_fw = false;
7465                 break;
7466         case GAUDI_EVENT_MME0_ACC_SERR:
7467         case GAUDI_EVENT_MME1_ACC_SERR:
7468         case GAUDI_EVENT_MME2_ACC_SERR:
7469         case GAUDI_EVENT_MME3_ACC_SERR:
7470                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7471                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7472                 params.num_memories = 128;
7473                 params.derr = false;
7474                 params.disable_clock_gating = true;
7475                 extract_info_from_fw = false;
7476                 break;
7477         case GAUDI_EVENT_MME0_ACC_DERR:
7478         case GAUDI_EVENT_MME1_ACC_DERR:
7479         case GAUDI_EVENT_MME2_ACC_DERR:
7480         case GAUDI_EVENT_MME3_ACC_DERR:
7481                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7482                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7483                 params.num_memories = 128;
7484                 params.derr = true;
7485                 params.disable_clock_gating = true;
7486                 extract_info_from_fw = false;
7487                 break;
7488         case GAUDI_EVENT_MME0_SBAB_SERR:
7489         case GAUDI_EVENT_MME1_SBAB_SERR:
7490         case GAUDI_EVENT_MME2_SBAB_SERR:
7491         case GAUDI_EVENT_MME3_SBAB_SERR:
7492                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7493                 params.block_address =
7494                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7495                 params.num_memories = 33;
7496                 params.derr = false;
7497                 params.disable_clock_gating = true;
7498                 extract_info_from_fw = false;
7499                 break;
7500         case GAUDI_EVENT_MME0_SBAB_DERR:
7501         case GAUDI_EVENT_MME1_SBAB_DERR:
7502         case GAUDI_EVENT_MME2_SBAB_DERR:
7503         case GAUDI_EVENT_MME3_SBAB_DERR:
7504                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7505                 params.block_address =
7506                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7507                 params.num_memories = 33;
7508                 params.derr = true;
7509                 params.disable_clock_gating = true;
7510                 extract_info_from_fw = false;
7511                 break;
7512         default:
7513                 return;
7514         }
7515
7516         if (extract_info_from_fw) {
7517                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7518                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7519                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7520         } else {
7521                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7522                                 &ecc_syndrom, &memory_wrapper_idx);
7523                 if (rc)
7524                         return;
7525         }
7526
7527         dev_err(hdev->dev,
7528                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7529                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7530 }
7531
7532 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7533 {
7534         u64 qman_base;
7535         char desc[32];
7536         u32 qid_base;
7537         u8 index;
7538
7539         switch (event_type) {
7540         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7541                 index = event_type - GAUDI_EVENT_TPC0_QM;
7542                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7543                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7544                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7545                 break;
7546         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7547                 index = event_type - GAUDI_EVENT_MME0_QM;
7548                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7549                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7550                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7551                 break;
7552         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7553                 index = event_type - GAUDI_EVENT_DMA0_QM;
7554                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7555                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7556                 if (index > 1)
7557                         qid_base++;
7558                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7559                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7560                 break;
7561         case GAUDI_EVENT_NIC0_QM0:
7562                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7563                 qman_base = mmNIC0_QM0_BASE;
7564                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7565                 break;
7566         case GAUDI_EVENT_NIC0_QM1:
7567                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7568                 qman_base = mmNIC0_QM1_BASE;
7569                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7570                 break;
7571         case GAUDI_EVENT_NIC1_QM0:
7572                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7573                 qman_base = mmNIC1_QM0_BASE;
7574                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7575                 break;
7576         case GAUDI_EVENT_NIC1_QM1:
7577                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7578                 qman_base = mmNIC1_QM1_BASE;
7579                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7580                 break;
7581         case GAUDI_EVENT_NIC2_QM0:
7582                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7583                 qman_base = mmNIC2_QM0_BASE;
7584                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7585                 break;
7586         case GAUDI_EVENT_NIC2_QM1:
7587                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7588                 qman_base = mmNIC2_QM1_BASE;
7589                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7590                 break;
7591         case GAUDI_EVENT_NIC3_QM0:
7592                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7593                 qman_base = mmNIC3_QM0_BASE;
7594                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7595                 break;
7596         case GAUDI_EVENT_NIC3_QM1:
7597                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7598                 qman_base = mmNIC3_QM1_BASE;
7599                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7600                 break;
7601         case GAUDI_EVENT_NIC4_QM0:
7602                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7603                 qman_base = mmNIC4_QM0_BASE;
7604                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7605                 break;
7606         case GAUDI_EVENT_NIC4_QM1:
7607                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7608                 qman_base = mmNIC4_QM1_BASE;
7609                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7610                 break;
7611         default:
7612                 return;
7613         }
7614
7615         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7616 }
7617
7618 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7619                                         bool razwi)
7620 {
7621         char desc[64] = "";
7622
7623         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7624         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7625                 event_type, desc);
7626
7627         if (razwi) {
7628                 gaudi_print_razwi_info(hdev);
7629                 gaudi_print_mmu_error_info(hdev);
7630         }
7631 }
7632
7633 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7634                                         struct cpucp_pkt_sync_err *sync_err)
7635 {
7636         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7637
7638         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7639                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7640 }
7641
7642 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7643                                         struct hl_eq_fw_alive *fw_alive)
7644 {
7645         dev_err(hdev->dev,
7646                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7647                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7648                 "Minor" : "Critical", fw_alive->process_id,
7649                 fw_alive->thread_id, fw_alive->uptime_seconds);
7650 }
7651
7652 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7653 {
7654         struct gaudi_device *gaudi = hdev->asic_specific;
7655
7656         /* Unmask all IRQs since some could have been received
7657          * during the soft reset
7658          */
7659         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7660 }
7661
7662 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7663                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7664 {
7665         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7666         int rc = 0;
7667
7668         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7669                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7670                 if (!hbm_ecc_data) {
7671                         dev_err(hdev->dev, "No FW ECC data");
7672                         return 0;
7673                 }
7674
7675                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7676                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7677                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7678                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7679                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7680                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7681                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7682                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7683                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7684                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7685                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7686                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7687                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7688                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7689
7690                 dev_err(hdev->dev,
7691                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7692                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7693                 dev_err(hdev->dev,
7694                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7695                         device, ch, hbm_ecc_data->first_addr, type,
7696                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7697                         hbm_ecc_data->dec_cnt);
7698                 return 0;
7699         }
7700
7701         if (hdev->asic_prop.fw_security_enabled) {
7702                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7703                 return 0;
7704         }
7705
7706         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7707         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7708                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7709                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7710                 if (val) {
7711                         rc = -EIO;
7712                         dev_err(hdev->dev,
7713                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7714                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7715                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7716                                 (val >> 4) & 0x1);
7717
7718                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7719                         dev_err(hdev->dev,
7720                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7721                                 device, ch * 2,
7722                                 RREG32(base + ch * 0x1000 + 0x064),
7723                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7724                                 (val2 & 0xFF0000) >> 16,
7725                                 (val2 & 0xFF000000) >> 24);
7726                 }
7727
7728                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7729                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7730                 if (val) {
7731                         rc = -EIO;
7732                         dev_err(hdev->dev,
7733                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7734                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7735                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7736                                 (val >> 4) & 0x1);
7737
7738                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7739                         dev_err(hdev->dev,
7740                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7741                                 device, ch * 2 + 1,
7742                                 RREG32(base + ch * 0x1000 + 0x074),
7743                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7744                                 (val2 & 0xFF0000) >> 16,
7745                                 (val2 & 0xFF000000) >> 24);
7746                 }
7747
7748                 /* Clear interrupts */
7749                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7750                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7751                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7752                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7753                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7754                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7755         }
7756
7757         val  = RREG32(base + 0x8F30);
7758         val2 = RREG32(base + 0x8F34);
7759         if (val | val2) {
7760                 rc = -EIO;
7761                 dev_err(hdev->dev,
7762                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7763                         device, val, val2);
7764         }
7765         val  = RREG32(base + 0x8F40);
7766         val2 = RREG32(base + 0x8F44);
7767         if (val | val2) {
7768                 rc = -EIO;
7769                 dev_err(hdev->dev,
7770                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7771                         device, val, val2);
7772         }
7773
7774         return rc;
7775 }
7776
7777 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7778 {
7779         switch (hbm_event_type) {
7780         case GAUDI_EVENT_HBM0_SPI_0:
7781         case GAUDI_EVENT_HBM0_SPI_1:
7782                 return 0;
7783         case GAUDI_EVENT_HBM1_SPI_0:
7784         case GAUDI_EVENT_HBM1_SPI_1:
7785                 return 1;
7786         case GAUDI_EVENT_HBM2_SPI_0:
7787         case GAUDI_EVENT_HBM2_SPI_1:
7788                 return 2;
7789         case GAUDI_EVENT_HBM3_SPI_0:
7790         case GAUDI_EVENT_HBM3_SPI_1:
7791                 return 3;
7792         default:
7793                 break;
7794         }
7795
7796         /* Should never happen */
7797         return 0;
7798 }
7799
7800 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7801                                         char *interrupt_name)
7802 {
7803         struct gaudi_device *gaudi = hdev->asic_specific;
7804         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7805         bool soft_reset_required = false;
7806
7807         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7808          * gating, and thus cannot be done in CPU-CP and should be done instead
7809          * by the driver.
7810          */
7811
7812         mutex_lock(&gaudi->clk_gate_mutex);
7813
7814         hdev->asic_funcs->disable_clock_gating(hdev);
7815
7816         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7817                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7818
7819         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7820                 if (tpc_interrupts_cause & BIT(i)) {
7821                         dev_err_ratelimited(hdev->dev,
7822                                         "TPC%d_%s interrupt cause: %s\n",
7823                                         tpc_id, interrupt_name,
7824                                         gaudi_tpc_interrupts_cause[i]);
7825                         /* If this is QM error, we need to soft-reset */
7826                         if (i == 15)
7827                                 soft_reset_required = true;
7828                 }
7829
7830         /* Clear interrupts */
7831         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7832
7833         hdev->asic_funcs->set_clock_gating(hdev);
7834
7835         mutex_unlock(&gaudi->clk_gate_mutex);
7836
7837         return soft_reset_required;
7838 }
7839
7840 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7841 {
7842         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7843 }
7844
7845 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7846 {
7847         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7848 }
7849
7850 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7851                                         u16 event_type)
7852 {
7853         switch (event_type) {
7854         case GAUDI_EVENT_FIX_POWER_ENV_S:
7855                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7856                 dev_info_ratelimited(hdev->dev,
7857                         "Clock throttling due to power consumption\n");
7858                 break;
7859
7860         case GAUDI_EVENT_FIX_POWER_ENV_E:
7861                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7862                 dev_info_ratelimited(hdev->dev,
7863                         "Power envelop is safe, back to optimal clock\n");
7864                 break;
7865
7866         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7867                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7868                 dev_info_ratelimited(hdev->dev,
7869                         "Clock throttling due to overheating\n");
7870                 break;
7871
7872         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7873                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7874                 dev_info_ratelimited(hdev->dev,
7875                         "Thermal envelop is safe, back to optimal clock\n");
7876                 break;
7877
7878         default:
7879                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7880                         event_type);
7881                 break;
7882         }
7883 }
7884
7885 static void gaudi_handle_eqe(struct hl_device *hdev,
7886                                 struct hl_eq_entry *eq_entry)
7887 {
7888         struct gaudi_device *gaudi = hdev->asic_specific;
7889         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7890         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7891                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7892         bool reset_required;
7893         u8 cause;
7894         int rc;
7895
7896         if (event_type >= GAUDI_EVENT_SIZE) {
7897                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7898                                 event_type, GAUDI_EVENT_SIZE - 1);
7899                 return;
7900         }
7901
7902         gaudi->events_stat[event_type]++;
7903         gaudi->events_stat_aggregate[event_type]++;
7904
7905         switch (event_type) {
7906         case GAUDI_EVENT_PCIE_CORE_DERR:
7907         case GAUDI_EVENT_PCIE_IF_DERR:
7908         case GAUDI_EVENT_PCIE_PHY_DERR:
7909         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7910         case GAUDI_EVENT_MME0_ACC_DERR:
7911         case GAUDI_EVENT_MME0_SBAB_DERR:
7912         case GAUDI_EVENT_MME1_ACC_DERR:
7913         case GAUDI_EVENT_MME1_SBAB_DERR:
7914         case GAUDI_EVENT_MME2_ACC_DERR:
7915         case GAUDI_EVENT_MME2_SBAB_DERR:
7916         case GAUDI_EVENT_MME3_ACC_DERR:
7917         case GAUDI_EVENT_MME3_SBAB_DERR:
7918         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7919                 fallthrough;
7920         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7921         case GAUDI_EVENT_PSOC_MEM_DERR:
7922         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7923         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7924         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7925         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7926         case GAUDI_EVENT_MMU_DERR:
7927         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7928                 gaudi_print_irq_info(hdev, event_type, true);
7929                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7930                 goto reset_device;
7931
7932         case GAUDI_EVENT_GIC500:
7933         case GAUDI_EVENT_AXI_ECC:
7934         case GAUDI_EVENT_L2_RAM_ECC:
7935         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7936                 gaudi_print_irq_info(hdev, event_type, false);
7937                 goto reset_device;
7938
7939         case GAUDI_EVENT_HBM0_SPI_0:
7940         case GAUDI_EVENT_HBM1_SPI_0:
7941         case GAUDI_EVENT_HBM2_SPI_0:
7942         case GAUDI_EVENT_HBM3_SPI_0:
7943                 gaudi_print_irq_info(hdev, event_type, false);
7944                 gaudi_hbm_read_interrupts(hdev,
7945                                 gaudi_hbm_event_to_dev(event_type),
7946                                 &eq_entry->hbm_ecc_data);
7947                 goto reset_device;
7948
7949         case GAUDI_EVENT_HBM0_SPI_1:
7950         case GAUDI_EVENT_HBM1_SPI_1:
7951         case GAUDI_EVENT_HBM2_SPI_1:
7952         case GAUDI_EVENT_HBM3_SPI_1:
7953                 gaudi_print_irq_info(hdev, event_type, false);
7954                 gaudi_hbm_read_interrupts(hdev,
7955                                 gaudi_hbm_event_to_dev(event_type),
7956                                 &eq_entry->hbm_ecc_data);
7957                 hl_fw_unmask_irq(hdev, event_type);
7958                 break;
7959
7960         case GAUDI_EVENT_TPC0_DEC:
7961         case GAUDI_EVENT_TPC1_DEC:
7962         case GAUDI_EVENT_TPC2_DEC:
7963         case GAUDI_EVENT_TPC3_DEC:
7964         case GAUDI_EVENT_TPC4_DEC:
7965         case GAUDI_EVENT_TPC5_DEC:
7966         case GAUDI_EVENT_TPC6_DEC:
7967         case GAUDI_EVENT_TPC7_DEC:
7968                 gaudi_print_irq_info(hdev, event_type, true);
7969                 reset_required = gaudi_tpc_read_interrupts(hdev,
7970                                         tpc_dec_event_to_tpc_id(event_type),
7971                                         "AXI_SLV_DEC_Error");
7972                 if (reset_required) {
7973                         dev_err(hdev->dev, "hard reset required due to %s\n",
7974                                 gaudi_irq_map_table[event_type].name);
7975
7976                         goto reset_device;
7977                 } else {
7978                         hl_fw_unmask_irq(hdev, event_type);
7979                 }
7980                 break;
7981
7982         case GAUDI_EVENT_TPC0_KRN_ERR:
7983         case GAUDI_EVENT_TPC1_KRN_ERR:
7984         case GAUDI_EVENT_TPC2_KRN_ERR:
7985         case GAUDI_EVENT_TPC3_KRN_ERR:
7986         case GAUDI_EVENT_TPC4_KRN_ERR:
7987         case GAUDI_EVENT_TPC5_KRN_ERR:
7988         case GAUDI_EVENT_TPC6_KRN_ERR:
7989         case GAUDI_EVENT_TPC7_KRN_ERR:
7990                 gaudi_print_irq_info(hdev, event_type, true);
7991                 reset_required = gaudi_tpc_read_interrupts(hdev,
7992                                         tpc_krn_event_to_tpc_id(event_type),
7993                                         "KRN_ERR");
7994                 if (reset_required) {
7995                         dev_err(hdev->dev, "hard reset required due to %s\n",
7996                                 gaudi_irq_map_table[event_type].name);
7997
7998                         goto reset_device;
7999                 } else {
8000                         hl_fw_unmask_irq(hdev, event_type);
8001                 }
8002                 break;
8003
8004         case GAUDI_EVENT_PCIE_CORE_SERR:
8005         case GAUDI_EVENT_PCIE_IF_SERR:
8006         case GAUDI_EVENT_PCIE_PHY_SERR:
8007         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
8008         case GAUDI_EVENT_MME0_ACC_SERR:
8009         case GAUDI_EVENT_MME0_SBAB_SERR:
8010         case GAUDI_EVENT_MME1_ACC_SERR:
8011         case GAUDI_EVENT_MME1_SBAB_SERR:
8012         case GAUDI_EVENT_MME2_ACC_SERR:
8013         case GAUDI_EVENT_MME2_SBAB_SERR:
8014         case GAUDI_EVENT_MME3_ACC_SERR:
8015         case GAUDI_EVENT_MME3_SBAB_SERR:
8016         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
8017         case GAUDI_EVENT_CPU_IF_ECC_SERR:
8018         case GAUDI_EVENT_PSOC_MEM_SERR:
8019         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
8020         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
8021         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
8022         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
8023                 fallthrough;
8024         case GAUDI_EVENT_MMU_SERR:
8025                 gaudi_print_irq_info(hdev, event_type, true);
8026                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
8027                 hl_fw_unmask_irq(hdev, event_type);
8028                 break;
8029
8030         case GAUDI_EVENT_PCIE_DEC:
8031         case GAUDI_EVENT_MME0_WBC_RSP:
8032         case GAUDI_EVENT_MME0_SBAB0_RSP:
8033         case GAUDI_EVENT_MME1_WBC_RSP:
8034         case GAUDI_EVENT_MME1_SBAB0_RSP:
8035         case GAUDI_EVENT_MME2_WBC_RSP:
8036         case GAUDI_EVENT_MME2_SBAB0_RSP:
8037         case GAUDI_EVENT_MME3_WBC_RSP:
8038         case GAUDI_EVENT_MME3_SBAB0_RSP:
8039         case GAUDI_EVENT_CPU_AXI_SPLITTER:
8040         case GAUDI_EVENT_PSOC_AXI_DEC:
8041         case GAUDI_EVENT_PSOC_PRSTN_FALL:
8042         case GAUDI_EVENT_MMU_PAGE_FAULT:
8043         case GAUDI_EVENT_MMU_WR_PERM:
8044         case GAUDI_EVENT_RAZWI_OR_ADC:
8045         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
8046         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
8047         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
8048                 fallthrough;
8049         case GAUDI_EVENT_NIC0_QM0:
8050         case GAUDI_EVENT_NIC0_QM1:
8051         case GAUDI_EVENT_NIC1_QM0:
8052         case GAUDI_EVENT_NIC1_QM1:
8053         case GAUDI_EVENT_NIC2_QM0:
8054         case GAUDI_EVENT_NIC2_QM1:
8055         case GAUDI_EVENT_NIC3_QM0:
8056         case GAUDI_EVENT_NIC3_QM1:
8057         case GAUDI_EVENT_NIC4_QM0:
8058         case GAUDI_EVENT_NIC4_QM1:
8059         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
8060                 gaudi_print_irq_info(hdev, event_type, true);
8061                 gaudi_handle_qman_err(hdev, event_type);
8062                 hl_fw_unmask_irq(hdev, event_type);
8063                 break;
8064
8065         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
8066                 gaudi_print_irq_info(hdev, event_type, true);
8067                 goto reset_device;
8068
8069         case GAUDI_EVENT_TPC0_BMON_SPMU:
8070         case GAUDI_EVENT_TPC1_BMON_SPMU:
8071         case GAUDI_EVENT_TPC2_BMON_SPMU:
8072         case GAUDI_EVENT_TPC3_BMON_SPMU:
8073         case GAUDI_EVENT_TPC4_BMON_SPMU:
8074         case GAUDI_EVENT_TPC5_BMON_SPMU:
8075         case GAUDI_EVENT_TPC6_BMON_SPMU:
8076         case GAUDI_EVENT_TPC7_BMON_SPMU:
8077         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
8078                 gaudi_print_irq_info(hdev, event_type, false);
8079                 hl_fw_unmask_irq(hdev, event_type);
8080                 break;
8081
8082         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
8083                 gaudi_print_irq_info(hdev, event_type, false);
8084                 gaudi_print_sm_sei_info(hdev, event_type,
8085                                         &eq_entry->sm_sei_data);
8086                 rc = hl_state_dump(hdev);
8087                 if (rc)
8088                         dev_err(hdev->dev,
8089                                 "Error during system state dump %d\n", rc);
8090                 hl_fw_unmask_irq(hdev, event_type);
8091                 break;
8092
8093         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8094                 gaudi_print_clk_change_info(hdev, event_type);
8095                 hl_fw_unmask_irq(hdev, event_type);
8096                 break;
8097
8098         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8099                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8100                 dev_err(hdev->dev,
8101                         "Received high temp H/W interrupt %d (cause %d)\n",
8102                         event_type, cause);
8103                 break;
8104
8105         case GAUDI_EVENT_DEV_RESET_REQ:
8106                 gaudi_print_irq_info(hdev, event_type, false);
8107                 goto reset_device;
8108
8109         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8110                 gaudi_print_irq_info(hdev, event_type, false);
8111                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8112                 goto reset_device;
8113
8114         case GAUDI_EVENT_FW_ALIVE_S:
8115                 gaudi_print_irq_info(hdev, event_type, false);
8116                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8117                 goto reset_device;
8118
8119         default:
8120                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8121                                 event_type);
8122                 break;
8123         }
8124
8125         return;
8126
8127 reset_device:
8128         if (hdev->hard_reset_on_fw_events)
8129                 hl_device_reset(hdev, HL_RESET_HARD);
8130         else
8131                 hl_fw_unmask_irq(hdev, event_type);
8132 }
8133
8134 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8135                                         u32 *size)
8136 {
8137         struct gaudi_device *gaudi = hdev->asic_specific;
8138
8139         if (aggregate) {
8140                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8141                 return gaudi->events_stat_aggregate;
8142         }
8143
8144         *size = (u32) sizeof(gaudi->events_stat);
8145         return gaudi->events_stat;
8146 }
8147
8148 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8149                                         u32 flags)
8150 {
8151         struct gaudi_device *gaudi = hdev->asic_specific;
8152         u32 status, timeout_usec;
8153         int rc;
8154
8155         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8156                 hdev->hard_reset_pending)
8157                 return 0;
8158
8159         if (hdev->pldm)
8160                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8161         else
8162                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8163
8164         /* L0 & L1 invalidation */
8165         WREG32(mmSTLB_INV_PS, 3);
8166         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8167         WREG32(mmSTLB_INV_PS, 2);
8168
8169         rc = hl_poll_timeout(
8170                 hdev,
8171                 mmSTLB_INV_PS,
8172                 status,
8173                 !status,
8174                 1000,
8175                 timeout_usec);
8176
8177         WREG32(mmSTLB_INV_SET, 0);
8178
8179         if (rc) {
8180                 dev_err_ratelimited(hdev->dev,
8181                                         "MMU cache invalidation timeout\n");
8182                 hl_device_reset(hdev, HL_RESET_HARD);
8183         }
8184
8185         return rc;
8186 }
8187
8188 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8189                                                 bool is_hard, u32 flags,
8190                                                 u32 asid, u64 va, u64 size)
8191 {
8192         /* Treat as invalidate all because there is no range invalidation
8193          * in Gaudi
8194          */
8195         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8196 }
8197
8198 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8199                                         u32 asid, u64 phys_addr)
8200 {
8201         u32 status, timeout_usec;
8202         int rc;
8203
8204         if (hdev->pldm)
8205                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8206         else
8207                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8208
8209         WREG32(MMU_ASID, asid);
8210         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8211         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8212         WREG32(MMU_BUSY, 0x80000000);
8213
8214         rc = hl_poll_timeout(
8215                 hdev,
8216                 MMU_BUSY,
8217                 status,
8218                 !(status & 0x80000000),
8219                 1000,
8220                 timeout_usec);
8221
8222         if (rc) {
8223                 dev_err(hdev->dev,
8224                         "Timeout during MMU hop0 config of asid %d\n", asid);
8225                 return rc;
8226         }
8227
8228         return 0;
8229 }
8230
8231 static int gaudi_send_heartbeat(struct hl_device *hdev)
8232 {
8233         struct gaudi_device *gaudi = hdev->asic_specific;
8234
8235         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8236                 return 0;
8237
8238         return hl_fw_send_heartbeat(hdev);
8239 }
8240
8241 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8242 {
8243         struct gaudi_device *gaudi = hdev->asic_specific;
8244         struct asic_fixed_properties *prop = &hdev->asic_prop;
8245         int rc;
8246
8247         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8248                 return 0;
8249
8250         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8251                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8252                                         mmCPU_BOOT_ERR1);
8253         if (rc)
8254                 return rc;
8255
8256         if (!strlen(prop->cpucp_info.card_name))
8257                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8258                                 CARD_NAME_MAX_LEN);
8259
8260         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8261
8262         set_default_power_values(hdev);
8263
8264         hdev->max_power = prop->max_power_default;
8265
8266         return 0;
8267 }
8268
8269 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8270                                         u8 mask_len, struct seq_file *s)
8271 {
8272         struct gaudi_device *gaudi = hdev->asic_specific;
8273         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8274         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8275         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8276         unsigned long *mask = (unsigned long *)mask_arr;
8277         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8278         bool is_idle = true, is_eng_idle, is_slave;
8279         u64 offset;
8280         int i, dma_id, port;
8281
8282         mutex_lock(&gaudi->clk_gate_mutex);
8283
8284         hdev->asic_funcs->disable_clock_gating(hdev);
8285
8286         if (s)
8287                 seq_puts(s,
8288                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8289                         "---  -------  ------------  ----------  -------------\n");
8290
8291         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8292                 dma_id = gaudi_dma_assignment[i];
8293                 offset = dma_id * DMA_QMAN_OFFSET;
8294
8295                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8296                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8297                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8298                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8299                                 IS_DMA_IDLE(dma_core_sts0);
8300                 is_idle &= is_eng_idle;
8301
8302                 if (mask && !is_eng_idle)
8303                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8304                 if (s)
8305                         seq_printf(s, fmt, dma_id,
8306                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8307                                 qm_cgm_sts, dma_core_sts0);
8308         }
8309
8310         if (s)
8311                 seq_puts(s,
8312                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8313                         "---  -------  ------------  ----------  ----------\n");
8314
8315         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8316                 offset = i * TPC_QMAN_OFFSET;
8317                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8318                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8319                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8320                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8321                                 IS_TPC_IDLE(tpc_cfg_sts);
8322                 is_idle &= is_eng_idle;
8323
8324                 if (mask && !is_eng_idle)
8325                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8326                 if (s)
8327                         seq_printf(s, fmt, i,
8328                                 is_eng_idle ? "Y" : "N",
8329                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8330         }
8331
8332         if (s)
8333                 seq_puts(s,
8334                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8335                         "---  -------  ------------  ----------  -----------\n");
8336
8337         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8338                 offset = i * MME_QMAN_OFFSET;
8339                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8340                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8341
8342                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8343                 is_slave = i % 2;
8344                 if (!is_slave) {
8345                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8346                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8347                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8348                 }
8349
8350                 is_idle &= is_eng_idle;
8351
8352                 if (mask && !is_eng_idle)
8353                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8354                 if (s) {
8355                         if (!is_slave)
8356                                 seq_printf(s, fmt, i,
8357                                         is_eng_idle ? "Y" : "N",
8358                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8359                         else
8360                                 seq_printf(s, mme_slave_fmt, i,
8361                                         is_eng_idle ? "Y" : "N", "-",
8362                                         "-", mme_arch_sts);
8363                 }
8364         }
8365
8366         if (s)
8367                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8368                                 "---  -------  ------------  ----------\n");
8369
8370         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8371                 offset = i * NIC_MACRO_QMAN_OFFSET;
8372                 port = 2 * i;
8373                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8374                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8375                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8376                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8377                         is_idle &= is_eng_idle;
8378
8379                         if (mask && !is_eng_idle)
8380                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8381                         if (s)
8382                                 seq_printf(s, nic_fmt, port,
8383                                                 is_eng_idle ? "Y" : "N",
8384                                                 qm_glbl_sts0, qm_cgm_sts);
8385                 }
8386
8387                 port = 2 * i + 1;
8388                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8389                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8390                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8391                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8392                         is_idle &= is_eng_idle;
8393
8394                         if (mask && !is_eng_idle)
8395                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8396                         if (s)
8397                                 seq_printf(s, nic_fmt, port,
8398                                                 is_eng_idle ? "Y" : "N",
8399                                                 qm_glbl_sts0, qm_cgm_sts);
8400                 }
8401         }
8402
8403         if (s)
8404                 seq_puts(s, "\n");
8405
8406         hdev->asic_funcs->set_clock_gating(hdev);
8407
8408         mutex_unlock(&gaudi->clk_gate_mutex);
8409
8410         return is_idle;
8411 }
8412
8413 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8414         __acquires(&gaudi->hw_queues_lock)
8415 {
8416         struct gaudi_device *gaudi = hdev->asic_specific;
8417
8418         spin_lock(&gaudi->hw_queues_lock);
8419 }
8420
8421 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8422         __releases(&gaudi->hw_queues_lock)
8423 {
8424         struct gaudi_device *gaudi = hdev->asic_specific;
8425
8426         spin_unlock(&gaudi->hw_queues_lock);
8427 }
8428
8429 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8430 {
8431         return hdev->pdev->device;
8432 }
8433
8434 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8435                                 size_t max_size)
8436 {
8437         struct gaudi_device *gaudi = hdev->asic_specific;
8438
8439         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8440                 return 0;
8441
8442         return hl_fw_get_eeprom_data(hdev, data, max_size);
8443 }
8444
8445 /*
8446  * this function should be used only during initialization and/or after reset,
8447  * when there are no active users.
8448  */
8449 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8450                                 u32 tpc_id)
8451 {
8452         struct gaudi_device *gaudi = hdev->asic_specific;
8453         u64 kernel_timeout;
8454         u32 status, offset;
8455         int rc;
8456
8457         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8458
8459         if (hdev->pldm)
8460                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8461         else
8462                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8463
8464         mutex_lock(&gaudi->clk_gate_mutex);
8465
8466         hdev->asic_funcs->disable_clock_gating(hdev);
8467
8468         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8469                         lower_32_bits(tpc_kernel));
8470         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8471                         upper_32_bits(tpc_kernel));
8472
8473         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8474                         lower_32_bits(tpc_kernel));
8475         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8476                         upper_32_bits(tpc_kernel));
8477         /* set a valid LUT pointer, content is of no significance */
8478         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8479                         lower_32_bits(tpc_kernel));
8480         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8481                         upper_32_bits(tpc_kernel));
8482
8483         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8484                         lower_32_bits(CFG_BASE +
8485                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8486
8487         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8488                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8489                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8490         /* wait a bit for the engine to start executing */
8491         usleep_range(1000, 1500);
8492
8493         /* wait until engine has finished executing */
8494         rc = hl_poll_timeout(
8495                 hdev,
8496                 mmTPC0_CFG_STATUS + offset,
8497                 status,
8498                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8499                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8500                 1000,
8501                 kernel_timeout);
8502
8503         if (rc) {
8504                 dev_err(hdev->dev,
8505                         "Timeout while waiting for TPC%d icache prefetch\n",
8506                         tpc_id);
8507                 hdev->asic_funcs->set_clock_gating(hdev);
8508                 mutex_unlock(&gaudi->clk_gate_mutex);
8509                 return -EIO;
8510         }
8511
8512         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8513                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8514
8515         /* wait a bit for the engine to start executing */
8516         usleep_range(1000, 1500);
8517
8518         /* wait until engine has finished executing */
8519         rc = hl_poll_timeout(
8520                 hdev,
8521                 mmTPC0_CFG_STATUS + offset,
8522                 status,
8523                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8524                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8525                 1000,
8526                 kernel_timeout);
8527
8528         if (rc) {
8529                 dev_err(hdev->dev,
8530                         "Timeout while waiting for TPC%d vector pipe\n",
8531                         tpc_id);
8532                 hdev->asic_funcs->set_clock_gating(hdev);
8533                 mutex_unlock(&gaudi->clk_gate_mutex);
8534                 return -EIO;
8535         }
8536
8537         rc = hl_poll_timeout(
8538                 hdev,
8539                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8540                 status,
8541                 (status == 0),
8542                 1000,
8543                 kernel_timeout);
8544
8545         hdev->asic_funcs->set_clock_gating(hdev);
8546         mutex_unlock(&gaudi->clk_gate_mutex);
8547
8548         if (rc) {
8549                 dev_err(hdev->dev,
8550                         "Timeout while waiting for TPC%d kernel to execute\n",
8551                         tpc_id);
8552                 return -EIO;
8553         }
8554
8555         return 0;
8556 }
8557
8558 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8559                 struct hl_ctx *ctx)
8560 {
8561         struct gaudi_device *gaudi = hdev->asic_specific;
8562         int min_alloc_order, rc, collective_cb_size;
8563
8564         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8565                 return 0;
8566
8567         hdev->internal_cb_pool_virt_addr =
8568                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8569                                         HOST_SPACE_INTERNAL_CB_SZ,
8570                                         &hdev->internal_cb_pool_dma_addr,
8571                                         GFP_KERNEL | __GFP_ZERO);
8572
8573         if (!hdev->internal_cb_pool_virt_addr)
8574                 return -ENOMEM;
8575
8576         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8577                         sizeof(struct packet_fence);
8578         min_alloc_order = ilog2(collective_cb_size);
8579
8580         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8581         if (!hdev->internal_cb_pool) {
8582                 dev_err(hdev->dev,
8583                         "Failed to create internal CB pool\n");
8584                 rc = -ENOMEM;
8585                 goto free_internal_cb_pool;
8586         }
8587
8588         rc = gen_pool_add(hdev->internal_cb_pool,
8589                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8590                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8591         if (rc) {
8592                 dev_err(hdev->dev,
8593                         "Failed to add memory to internal CB pool\n");
8594                 rc = -EFAULT;
8595                 goto destroy_internal_cb_pool;
8596         }
8597
8598         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8599                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8600                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8601
8602         if (!hdev->internal_cb_va_base) {
8603                 rc = -ENOMEM;
8604                 goto destroy_internal_cb_pool;
8605         }
8606
8607         mutex_lock(&ctx->mmu_lock);
8608         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8609                         hdev->internal_cb_pool_dma_addr,
8610                         HOST_SPACE_INTERNAL_CB_SZ);
8611
8612         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8613         mutex_unlock(&ctx->mmu_lock);
8614
8615         if (rc)
8616                 goto unreserve_internal_cb_pool;
8617
8618         return 0;
8619
8620 unreserve_internal_cb_pool:
8621         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8622                         HOST_SPACE_INTERNAL_CB_SZ);
8623 destroy_internal_cb_pool:
8624         gen_pool_destroy(hdev->internal_cb_pool);
8625 free_internal_cb_pool:
8626         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8627                         HOST_SPACE_INTERNAL_CB_SZ,
8628                         hdev->internal_cb_pool_virt_addr,
8629                         hdev->internal_cb_pool_dma_addr);
8630
8631         return rc;
8632 }
8633
8634 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8635                 struct hl_ctx *ctx)
8636 {
8637         struct gaudi_device *gaudi = hdev->asic_specific;
8638
8639         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8640                 return;
8641
8642         mutex_lock(&ctx->mmu_lock);
8643         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8644                         HOST_SPACE_INTERNAL_CB_SZ);
8645         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8646                         HOST_SPACE_INTERNAL_CB_SZ);
8647         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8648         mutex_unlock(&ctx->mmu_lock);
8649
8650         gen_pool_destroy(hdev->internal_cb_pool);
8651
8652         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8653                         HOST_SPACE_INTERNAL_CB_SZ,
8654                         hdev->internal_cb_pool_virt_addr,
8655                         hdev->internal_cb_pool_dma_addr);
8656 }
8657
8658 static int gaudi_ctx_init(struct hl_ctx *ctx)
8659 {
8660         if (ctx->asid == HL_KERNEL_ASID_ID)
8661                 return 0;
8662
8663         return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8664 }
8665
8666 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8667 {
8668         if (ctx->asid == HL_KERNEL_ASID_ID)
8669                 return;
8670
8671         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8672 }
8673
8674 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8675 {
8676         return gaudi_cq_assignment[cq_idx];
8677 }
8678
8679 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8680 {
8681         return sizeof(struct packet_msg_short) +
8682                         sizeof(struct packet_msg_prot) * 2;
8683 }
8684
8685 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8686 {
8687         return sizeof(struct packet_msg_short) * 4 +
8688                         sizeof(struct packet_fence) +
8689                         sizeof(struct packet_msg_prot) * 2;
8690 }
8691
8692 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8693 {
8694         return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8695 }
8696
8697 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8698                                 u32 size, bool eb)
8699 {
8700         struct hl_cb *cb = (struct hl_cb *) data;
8701         struct packet_msg_short *pkt;
8702         u32 value, ctl, pkt_size = sizeof(*pkt);
8703
8704         pkt = cb->kernel_address + size;
8705         memset(pkt, 0, pkt_size);
8706
8707         /* Inc by 1, Mode ADD */
8708         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8709         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8710
8711         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8712         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8713         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8714         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8715         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8716         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8717         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8718
8719         pkt->value = cpu_to_le32(value);
8720         pkt->ctl = cpu_to_le32(ctl);
8721
8722         return size + pkt_size;
8723 }
8724
8725 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8726                                         u16 addr)
8727 {
8728         u32 ctl, pkt_size = sizeof(*pkt);
8729
8730         memset(pkt, 0, pkt_size);
8731
8732         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8733         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8734         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8735         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8736         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8737         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8738
8739         pkt->value = cpu_to_le32(value);
8740         pkt->ctl = cpu_to_le32(ctl);
8741
8742         return pkt_size;
8743 }
8744
8745 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8746                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8747                 u16 sob_val, u16 mon_id)
8748 {
8749         u64 monitor_base;
8750         u32 ctl, value, pkt_size = sizeof(*pkt);
8751         u16 msg_addr_offset;
8752         u8 mask;
8753
8754         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8755                 dev_err(hdev->dev,
8756                         "sob_base %u (mask %#x) is not valid\n",
8757                         sob_base, sob_mask);
8758                 return 0;
8759         }
8760
8761         /*
8762          * monitor_base should be the content of the base0 address registers,
8763          * so it will be added to the msg short offsets
8764          */
8765         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8766
8767         msg_addr_offset =
8768                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8769                                 monitor_base;
8770
8771         memset(pkt, 0, pkt_size);
8772
8773         /* Monitor config packet: bind the monitor to a sync object */
8774         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8775         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8776         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8777                         0); /* GREATER OR EQUAL*/
8778         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8779
8780         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8781         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8782         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8783         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8784         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8785         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8786         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8787
8788         pkt->value = cpu_to_le32(value);
8789         pkt->ctl = cpu_to_le32(ctl);
8790
8791         return pkt_size;
8792 }
8793
8794 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8795 {
8796         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8797
8798         memset(pkt, 0, pkt_size);
8799
8800         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8801         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8802         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8803
8804         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8805         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8806         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8807         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8808
8809         pkt->cfg = cpu_to_le32(cfg);
8810         pkt->ctl = cpu_to_le32(ctl);
8811
8812         return pkt_size;
8813 }
8814
8815 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8816 {
8817         u32 offset, nic_index;
8818
8819         switch (queue_id) {
8820         case GAUDI_QUEUE_ID_DMA_0_0:
8821                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8822                 break;
8823         case GAUDI_QUEUE_ID_DMA_0_1:
8824                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8825                 break;
8826         case GAUDI_QUEUE_ID_DMA_0_2:
8827                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8828                 break;
8829         case GAUDI_QUEUE_ID_DMA_0_3:
8830                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8831                 break;
8832         case GAUDI_QUEUE_ID_DMA_1_0:
8833                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8834                 break;
8835         case GAUDI_QUEUE_ID_DMA_1_1:
8836                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8837                 break;
8838         case GAUDI_QUEUE_ID_DMA_1_2:
8839                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8840                 break;
8841         case GAUDI_QUEUE_ID_DMA_1_3:
8842                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8843                 break;
8844         case GAUDI_QUEUE_ID_DMA_5_0:
8845                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8846                 break;
8847         case GAUDI_QUEUE_ID_DMA_5_1:
8848                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8849                 break;
8850         case GAUDI_QUEUE_ID_DMA_5_2:
8851                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8852                 break;
8853         case GAUDI_QUEUE_ID_DMA_5_3:
8854                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8855                 break;
8856         case GAUDI_QUEUE_ID_TPC_7_0:
8857                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8858                 break;
8859         case GAUDI_QUEUE_ID_TPC_7_1:
8860                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8861                 break;
8862         case GAUDI_QUEUE_ID_TPC_7_2:
8863                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8864                 break;
8865         case GAUDI_QUEUE_ID_TPC_7_3:
8866                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8867                 break;
8868         case GAUDI_QUEUE_ID_NIC_0_0:
8869         case GAUDI_QUEUE_ID_NIC_1_0:
8870         case GAUDI_QUEUE_ID_NIC_2_0:
8871         case GAUDI_QUEUE_ID_NIC_3_0:
8872         case GAUDI_QUEUE_ID_NIC_4_0:
8873         case GAUDI_QUEUE_ID_NIC_5_0:
8874         case GAUDI_QUEUE_ID_NIC_6_0:
8875         case GAUDI_QUEUE_ID_NIC_7_0:
8876         case GAUDI_QUEUE_ID_NIC_8_0:
8877         case GAUDI_QUEUE_ID_NIC_9_0:
8878                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8879                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8880                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8881                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8882                 break;
8883         case GAUDI_QUEUE_ID_NIC_0_1:
8884         case GAUDI_QUEUE_ID_NIC_1_1:
8885         case GAUDI_QUEUE_ID_NIC_2_1:
8886         case GAUDI_QUEUE_ID_NIC_3_1:
8887         case GAUDI_QUEUE_ID_NIC_4_1:
8888         case GAUDI_QUEUE_ID_NIC_5_1:
8889         case GAUDI_QUEUE_ID_NIC_6_1:
8890         case GAUDI_QUEUE_ID_NIC_7_1:
8891         case GAUDI_QUEUE_ID_NIC_8_1:
8892         case GAUDI_QUEUE_ID_NIC_9_1:
8893                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8894                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8895                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8896                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8897                 break;
8898         case GAUDI_QUEUE_ID_NIC_0_2:
8899         case GAUDI_QUEUE_ID_NIC_1_2:
8900         case GAUDI_QUEUE_ID_NIC_2_2:
8901         case GAUDI_QUEUE_ID_NIC_3_2:
8902         case GAUDI_QUEUE_ID_NIC_4_2:
8903         case GAUDI_QUEUE_ID_NIC_5_2:
8904         case GAUDI_QUEUE_ID_NIC_6_2:
8905         case GAUDI_QUEUE_ID_NIC_7_2:
8906         case GAUDI_QUEUE_ID_NIC_8_2:
8907         case GAUDI_QUEUE_ID_NIC_9_2:
8908                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8909                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8910                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8911                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8912                 break;
8913         case GAUDI_QUEUE_ID_NIC_0_3:
8914         case GAUDI_QUEUE_ID_NIC_1_3:
8915         case GAUDI_QUEUE_ID_NIC_2_3:
8916         case GAUDI_QUEUE_ID_NIC_3_3:
8917         case GAUDI_QUEUE_ID_NIC_4_3:
8918         case GAUDI_QUEUE_ID_NIC_5_3:
8919         case GAUDI_QUEUE_ID_NIC_6_3:
8920         case GAUDI_QUEUE_ID_NIC_7_3:
8921         case GAUDI_QUEUE_ID_NIC_8_3:
8922         case GAUDI_QUEUE_ID_NIC_9_3:
8923                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8924                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8925                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8926                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8927                 break;
8928         default:
8929                 return -EINVAL;
8930         }
8931
8932         *addr = CFG_BASE + offset;
8933
8934         return 0;
8935 }
8936
8937 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8938 {
8939         u64 monitor_base;
8940         u32 size = 0;
8941         u16 msg_addr_offset;
8942
8943         /*
8944          * monitor_base should be the content of the base0 address registers,
8945          * so it will be added to the msg short offsets
8946          */
8947         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8948
8949         /* First monitor config packet: low address of the sync */
8950         msg_addr_offset =
8951                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8952                                 monitor_base;
8953
8954         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8955                                         msg_addr_offset);
8956
8957         /* Second monitor config packet: high address of the sync */
8958         msg_addr_offset =
8959                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8960                                 monitor_base;
8961
8962         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8963                                         msg_addr_offset);
8964
8965         /*
8966          * Third monitor config packet: the payload, i.e. what to write when the
8967          * sync triggers
8968          */
8969         msg_addr_offset =
8970                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8971                                 monitor_base;
8972
8973         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8974
8975         return size;
8976 }
8977
8978 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8979                                 struct hl_gen_wait_properties *prop)
8980 {
8981         struct hl_cb *cb = (struct hl_cb *) prop->data;
8982         void *buf = cb->kernel_address;
8983         u64 fence_addr = 0;
8984         u32 size = prop->size;
8985
8986         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8987                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8988                                 prop->q_idx);
8989                 return 0;
8990         }
8991
8992         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8993         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8994                         prop->sob_mask, prop->sob_val, prop->mon_id);
8995         size += gaudi_add_fence_pkt(buf + size);
8996
8997         return size;
8998 }
8999
9000 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
9001 {
9002         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
9003
9004         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
9005                 hw_sob->sob_id);
9006
9007         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
9008                         hw_sob->sob_id * 4, 0);
9009
9010         kref_init(&hw_sob->kref);
9011 }
9012
9013 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
9014 {
9015         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
9016                                                         HL_POWER9_HOST_MAGIC) {
9017                 hdev->power9_64bit_dma_enable = 1;
9018                 hdev->dma_mask = 64;
9019         } else {
9020                 hdev->power9_64bit_dma_enable = 0;
9021                 hdev->dma_mask = 48;
9022         }
9023 }
9024
9025 static u64 gaudi_get_device_time(struct hl_device *hdev)
9026 {
9027         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
9028
9029         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
9030 }
9031
9032 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
9033                                 u32 *block_size, u32 *block_id)
9034 {
9035         return -EPERM;
9036 }
9037
9038 static int gaudi_block_mmap(struct hl_device *hdev,
9039                                 struct vm_area_struct *vma,
9040                                 u32 block_id, u32 block_size)
9041 {
9042         return -EPERM;
9043 }
9044
9045 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
9046 {
9047         struct cpu_dyn_regs *dyn_regs =
9048                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
9049         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
9050                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
9051                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
9052
9053         WREG32(irq_handler_offset,
9054                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
9055 }
9056
9057 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
9058 {
9059         switch (pll_idx) {
9060         case HL_GAUDI_CPU_PLL: return CPU_PLL;
9061         case HL_GAUDI_PCI_PLL: return PCI_PLL;
9062         case HL_GAUDI_NIC_PLL: return NIC_PLL;
9063         case HL_GAUDI_DMA_PLL: return DMA_PLL;
9064         case HL_GAUDI_MESH_PLL: return MESH_PLL;
9065         case HL_GAUDI_MME_PLL: return MME_PLL;
9066         case HL_GAUDI_TPC_PLL: return TPC_PLL;
9067         case HL_GAUDI_IF_PLL: return IF_PLL;
9068         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
9069         case HL_GAUDI_HBM_PLL: return HBM_PLL;
9070         default: return -EINVAL;
9071         }
9072 }
9073
9074 static int gaudi_add_sync_to_engine_map_entry(
9075         struct hl_sync_to_engine_map *map, u32 reg_value,
9076         enum hl_sync_engine_type engine_type, u32 engine_id)
9077 {
9078         struct hl_sync_to_engine_map_entry *entry;
9079
9080         /* Reg value represents a partial address of sync object,
9081          * it is used as unique identifier. For this we need to
9082          * clear the cutoff cfg base bits from the value.
9083          */
9084         if (reg_value == 0 || reg_value == 0xffffffff)
9085                 return 0;
9086         reg_value -= (u32)CFG_BASE;
9087
9088         /* create a new hash entry */
9089         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
9090         if (!entry)
9091                 return -ENOMEM;
9092         entry->engine_type = engine_type;
9093         entry->engine_id = engine_id;
9094         entry->sync_id = reg_value;
9095         hash_add(map->tb, &entry->node, reg_value);
9096
9097         return 0;
9098 }
9099
9100 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
9101                                 struct hl_sync_to_engine_map *map)
9102 {
9103         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9104         struct gaudi_device *gaudi = hdev->asic_specific;
9105         int i, j, rc;
9106         u32 reg_value;
9107
9108         /* Iterate over TPC engines */
9109         for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
9110                 /* TPC registered must be accessed with clock gating disabled */
9111                 mutex_lock(&gaudi->clk_gate_mutex);
9112                 hdev->asic_funcs->disable_clock_gating(hdev);
9113
9114                 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
9115                                         sds->props[SP_NEXT_TPC] * i);
9116
9117                 /* We can reenable clock_gating */
9118                 hdev->asic_funcs->set_clock_gating(hdev);
9119                 mutex_unlock(&gaudi->clk_gate_mutex);
9120
9121                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9122                                                         ENGINE_TPC, i);
9123                 if (rc)
9124                         goto free_sync_to_engine_map;
9125         }
9126
9127         /* Iterate over MME engines */
9128         for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
9129                 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
9130                         /* MME registered must be accessed with clock gating
9131                          * disabled
9132                          */
9133                         mutex_lock(&gaudi->clk_gate_mutex);
9134                         hdev->asic_funcs->disable_clock_gating(hdev);
9135
9136                         reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
9137                                                 sds->props[SP_NEXT_MME] * i +
9138                                                 j * sizeof(u32));
9139
9140                         /* We can reenable clock_gating */
9141                         hdev->asic_funcs->set_clock_gating(hdev);
9142                         mutex_unlock(&gaudi->clk_gate_mutex);
9143
9144                         rc = gaudi_add_sync_to_engine_map_entry(
9145                                 map, reg_value, ENGINE_MME,
9146                                 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
9147                         if (rc)
9148                                 goto free_sync_to_engine_map;
9149                 }
9150         }
9151
9152         /* Iterate over DMA engines */
9153         for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
9154                 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
9155                                         sds->props[SP_DMA_QUEUES_OFFSET] * i);
9156                 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
9157                                                         ENGINE_DMA, i);
9158                 if (rc)
9159                         goto free_sync_to_engine_map;
9160         }
9161
9162         return 0;
9163
9164 free_sync_to_engine_map:
9165         hl_state_dump_free_sync_to_engine_map(map);
9166
9167         return rc;
9168 }
9169
9170 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
9171 {
9172         return FIELD_GET(
9173                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
9174                 mon->status);
9175 }
9176
9177 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
9178                                 struct hl_device *hdev,
9179                                 struct hl_mon_state_dump *mon)
9180 {
9181         const char *name;
9182         char scratch_buf1[BIN_REG_STRING_SIZE],
9183                 scratch_buf2[BIN_REG_STRING_SIZE];
9184
9185         name = hl_state_dump_get_monitor_name(hdev, mon);
9186         if (!name)
9187                 name = "";
9188
9189         return hl_snprintf_resize(
9190                 buf, size, offset,
9191                 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s",
9192                 mon->id, name,
9193                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
9194                                 mon->arm_data),
9195                 hl_format_as_binary(
9196                         scratch_buf1, sizeof(scratch_buf1),
9197                         FIELD_GET(
9198                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
9199                                 mon->arm_data)),
9200                 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
9201                                 mon->arm_data),
9202                 mon->wr_data,
9203                 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
9204                 hl_format_as_binary(
9205                         scratch_buf2, sizeof(scratch_buf2),
9206                         FIELD_GET(
9207                                 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
9208                                 mon->status)));
9209 }
9210
9211
9212 static int gaudi_print_fences_single_engine(
9213         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9214         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9215         size_t *size, size_t *offset)
9216 {
9217         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9218         int rc = -ENOMEM, i;
9219         u32 *statuses, *fences;
9220
9221         statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
9222                         sizeof(*statuses), GFP_KERNEL);
9223         if (!statuses)
9224                 goto out;
9225
9226         fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
9227                                 sds->props[SP_ENGINE_NUM_OF_QUEUES],
9228                          sizeof(*fences), GFP_KERNEL);
9229         if (!fences)
9230                 goto free_status;
9231
9232         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9233                 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9234
9235         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9236                                 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9237                 fences[i] = RREG32(base_offset + i * sizeof(u32));
9238
9239         /* The actual print */
9240         for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9241                 u32 fence_id;
9242                 u64 fence_cnt, fence_rdata;
9243                 const char *engine_name;
9244
9245                 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9246                         statuses[i]))
9247                         continue;
9248
9249                 fence_id =
9250                         FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9251                 fence_cnt = base_offset + CFG_BASE +
9252                         sizeof(u32) *
9253                         (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9254                 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9255                                 sds->props[SP_FENCE0_RDATA_OFFSET];
9256                 engine_name = hl_sync_engine_to_string(engine_type);
9257
9258                 rc = hl_snprintf_resize(
9259                         buf, size, offset,
9260                         "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9261                         engine_name, engine_id,
9262                         i, fence_id,
9263                         fence_cnt, engine_name, engine_id, fence_id, i,
9264                         fence_rdata, engine_name, engine_id, fence_id, i,
9265                         fences[fence_id],
9266                         statuses[i]);
9267                 if (rc)
9268                         goto free_fences;
9269         }
9270
9271         rc = 0;
9272
9273 free_fences:
9274         kfree(fences);
9275 free_status:
9276         kfree(statuses);
9277 out:
9278         return rc;
9279 }
9280
9281
9282 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9283         .monitor_valid = gaudi_monitor_valid,
9284         .print_single_monitor = gaudi_print_single_monitor,
9285         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9286         .print_fences_single_engine = gaudi_print_fences_single_engine,
9287 };
9288
9289 static void gaudi_state_dump_init(struct hl_device *hdev)
9290 {
9291         struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9292         int i;
9293
9294         for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9295                 hash_add(sds->so_id_to_str_tb,
9296                         &gaudi_so_id_to_str[i].node,
9297                         gaudi_so_id_to_str[i].id);
9298
9299         for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9300                 hash_add(sds->monitor_id_to_str_tb,
9301                         &gaudi_monitor_id_to_str[i].node,
9302                         gaudi_monitor_id_to_str[i].id);
9303
9304         sds->props = gaudi_state_dump_specs_props;
9305
9306         sds->sync_namager_names = gaudi_sync_manager_names;
9307
9308         sds->funcs = gaudi_state_dump_funcs;
9309 }
9310
9311 static const struct hl_asic_funcs gaudi_funcs = {
9312         .early_init = gaudi_early_init,
9313         .early_fini = gaudi_early_fini,
9314         .late_init = gaudi_late_init,
9315         .late_fini = gaudi_late_fini,
9316         .sw_init = gaudi_sw_init,
9317         .sw_fini = gaudi_sw_fini,
9318         .hw_init = gaudi_hw_init,
9319         .hw_fini = gaudi_hw_fini,
9320         .halt_engines = gaudi_halt_engines,
9321         .suspend = gaudi_suspend,
9322         .resume = gaudi_resume,
9323         .mmap = gaudi_mmap,
9324         .ring_doorbell = gaudi_ring_doorbell,
9325         .pqe_write = gaudi_pqe_write,
9326         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9327         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9328         .scrub_device_mem = gaudi_scrub_device_mem,
9329         .get_int_queue_base = gaudi_get_int_queue_base,
9330         .test_queues = gaudi_test_queues,
9331         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9332         .asic_dma_pool_free = gaudi_dma_pool_free,
9333         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9334         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9335         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9336         .cs_parser = gaudi_cs_parser,
9337         .asic_dma_map_sg = gaudi_dma_map_sg,
9338         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9339         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9340         .update_eq_ci = gaudi_update_eq_ci,
9341         .context_switch = gaudi_context_switch,
9342         .restore_phase_topology = gaudi_restore_phase_topology,
9343         .debugfs_read32 = gaudi_debugfs_read32,
9344         .debugfs_write32 = gaudi_debugfs_write32,
9345         .debugfs_read64 = gaudi_debugfs_read64,
9346         .debugfs_write64 = gaudi_debugfs_write64,
9347         .debugfs_read_dma = gaudi_debugfs_read_dma,
9348         .add_device_attr = gaudi_add_device_attr,
9349         .handle_eqe = gaudi_handle_eqe,
9350         .set_pll_profile = gaudi_set_pll_profile,
9351         .get_events_stat = gaudi_get_events_stat,
9352         .read_pte = gaudi_read_pte,
9353         .write_pte = gaudi_write_pte,
9354         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9355         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9356         .send_heartbeat = gaudi_send_heartbeat,
9357         .set_clock_gating = gaudi_set_clock_gating,
9358         .disable_clock_gating = gaudi_disable_clock_gating,
9359         .debug_coresight = gaudi_debug_coresight,
9360         .is_device_idle = gaudi_is_device_idle,
9361         .soft_reset_late_init = gaudi_soft_reset_late_init,
9362         .hw_queues_lock = gaudi_hw_queues_lock,
9363         .hw_queues_unlock = gaudi_hw_queues_unlock,
9364         .get_pci_id = gaudi_get_pci_id,
9365         .get_eeprom_data = gaudi_get_eeprom_data,
9366         .send_cpu_message = gaudi_send_cpu_message,
9367         .pci_bars_map = gaudi_pci_bars_map,
9368         .init_iatu = gaudi_init_iatu,
9369         .rreg = hl_rreg,
9370         .wreg = hl_wreg,
9371         .halt_coresight = gaudi_halt_coresight,
9372         .ctx_init = gaudi_ctx_init,
9373         .ctx_fini = gaudi_ctx_fini,
9374         .get_clk_rate = gaudi_get_clk_rate,
9375         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9376         .load_firmware_to_device = gaudi_load_firmware_to_device,
9377         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9378         .get_signal_cb_size = gaudi_get_signal_cb_size,
9379         .get_wait_cb_size = gaudi_get_wait_cb_size,
9380         .gen_signal_cb = gaudi_gen_signal_cb,
9381         .gen_wait_cb = gaudi_gen_wait_cb,
9382         .reset_sob = gaudi_reset_sob,
9383         .reset_sob_group = gaudi_reset_sob_group,
9384         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9385         .get_device_time = gaudi_get_device_time,
9386         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9387         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9388         .scramble_addr = hl_mmu_scramble_addr,
9389         .descramble_addr = hl_mmu_descramble_addr,
9390         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9391         .get_hw_block_id = gaudi_get_hw_block_id,
9392         .hw_block_mmap = gaudi_block_mmap,
9393         .enable_events_from_fw = gaudi_enable_events_from_fw,
9394         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9395         .init_firmware_loader = gaudi_init_firmware_loader,
9396         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9397         .state_dump_init = gaudi_state_dump_init,
9398         .get_sob_addr = gaudi_get_sob_addr,
9399         .set_pci_memory_regions = gaudi_set_pci_memory_regions
9400 };
9401
9402 /**
9403  * gaudi_set_asic_funcs - set GAUDI function pointers
9404  *
9405  * @hdev: pointer to hl_device structure
9406  *
9407  */
9408 void gaudi_set_asic_funcs(struct hl_device *hdev)
9409 {
9410         hdev->asic_funcs = &gaudi_funcs;
9411 }