habanalabs: Modify the cs_cnt of a CB to be atomic
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81
82 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
83
84 #define GAUDI_MAX_STRING_LEN            20
85
86 #define GAUDI_CB_POOL_CB_CNT            512
87 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
88
89 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
90
91 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
92
93 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
94
95 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
96
97 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
98
99 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
100                 BIT(GAUDI_ENGINE_ID_MME_0) |\
101                 BIT(GAUDI_ENGINE_ID_MME_2) |\
102                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
103
104 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
105
106 #define GAUDI_PLL_MAX 10
107
108 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
109                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
110                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
111                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
112                 "gaudi cpu eq"
113 };
114
115 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
116         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
117         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
118         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
119         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
120         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
121         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
122         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
123         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
124 };
125
126 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
127         [0] = GAUDI_QUEUE_ID_DMA_0_0,
128         [1] = GAUDI_QUEUE_ID_DMA_0_1,
129         [2] = GAUDI_QUEUE_ID_DMA_0_2,
130         [3] = GAUDI_QUEUE_ID_DMA_0_3,
131         [4] = GAUDI_QUEUE_ID_DMA_1_0,
132         [5] = GAUDI_QUEUE_ID_DMA_1_1,
133         [6] = GAUDI_QUEUE_ID_DMA_1_2,
134         [7] = GAUDI_QUEUE_ID_DMA_1_3,
135 };
136
137 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
138         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
139         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
140         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
141         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
142         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
143         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
144         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
145         [PACKET_FENCE]          = sizeof(struct packet_fence),
146         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
147         [PACKET_NOP]            = sizeof(struct packet_nop),
148         [PACKET_STOP]           = sizeof(struct packet_stop),
149         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
150         [PACKET_WAIT]           = sizeof(struct packet_wait),
151         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
152 };
153
154 static const u32 gaudi_pll_base_addresses[GAUDI_PLL_MAX] = {
155         [CPU_PLL] = mmPSOC_CPU_PLL_NR,
156         [PCI_PLL] = mmPSOC_PCI_PLL_NR,
157         [SRAM_PLL] = mmSRAM_W_PLL_NR,
158         [HBM_PLL] = mmPSOC_HBM_PLL_NR,
159         [NIC_PLL] = mmNIC0_PLL_NR,
160         [DMA_PLL] = mmDMA_W_PLL_NR,
161         [MESH_PLL] = mmMESH_W_PLL_NR,
162         [MME_PLL] = mmPSOC_MME_PLL_NR,
163         [TPC_PLL] = mmPSOC_TPC_PLL_NR,
164         [IF_PLL] = mmIF_W_PLL_NR
165 };
166
167 static inline bool validate_packet_id(enum packet_id id)
168 {
169         switch (id) {
170         case PACKET_WREG_32:
171         case PACKET_WREG_BULK:
172         case PACKET_MSG_LONG:
173         case PACKET_MSG_SHORT:
174         case PACKET_CP_DMA:
175         case PACKET_REPEAT:
176         case PACKET_MSG_PROT:
177         case PACKET_FENCE:
178         case PACKET_LIN_DMA:
179         case PACKET_NOP:
180         case PACKET_STOP:
181         case PACKET_ARB_POINT:
182         case PACKET_WAIT:
183         case PACKET_LOAD_AND_EXE:
184                 return true;
185         default:
186                 return false;
187         }
188 }
189
190 static const char * const
191 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
192         "tpc_address_exceed_slm",
193         "tpc_div_by_0",
194         "tpc_spu_mac_overflow",
195         "tpc_spu_addsub_overflow",
196         "tpc_spu_abs_overflow",
197         "tpc_spu_fp_dst_nan_inf",
198         "tpc_spu_fp_dst_denorm",
199         "tpc_vpu_mac_overflow",
200         "tpc_vpu_addsub_overflow",
201         "tpc_vpu_abs_overflow",
202         "tpc_vpu_fp_dst_nan_inf",
203         "tpc_vpu_fp_dst_denorm",
204         "tpc_assertions",
205         "tpc_illegal_instruction",
206         "tpc_pc_wrap_around",
207         "tpc_qm_sw_err",
208         "tpc_hbw_rresp_err",
209         "tpc_hbw_bresp_err",
210         "tpc_lbw_rresp_err",
211         "tpc_lbw_bresp_err"
212 };
213
214 static const char * const
215 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
216         "PQ AXI HBW error",
217         "CQ AXI HBW error",
218         "CP AXI HBW error",
219         "CP error due to undefined OPCODE",
220         "CP encountered STOP OPCODE",
221         "CP AXI LBW error",
222         "CP WRREG32 or WRBULK returned error",
223         "N/A",
224         "FENCE 0 inc over max value and clipped",
225         "FENCE 1 inc over max value and clipped",
226         "FENCE 2 inc over max value and clipped",
227         "FENCE 3 inc over max value and clipped",
228         "FENCE 0 dec under min value and clipped",
229         "FENCE 1 dec under min value and clipped",
230         "FENCE 2 dec under min value and clipped",
231         "FENCE 3 dec under min value and clipped"
232 };
233
234 static const char * const
235 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
236         "Choice push while full error",
237         "Choice Q watchdog error",
238         "MSG AXI LBW returned with error"
239 };
240
241 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
244         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
245         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
246         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
247         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
248         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
249         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
250         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
349         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
350         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
351         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
352         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
353         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
354         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
355 };
356
357 struct ecc_info_extract_params {
358         u64 block_address;
359         u32 num_memories;
360         bool derr;
361         bool disable_clock_gating;
362 };
363
364 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
365                                                                 u64 phys_addr);
366 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
367                                         struct hl_cs_job *job);
368 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
369                                         u32 size, u64 val);
370 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
371                                 u32 tpc_id);
372 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
373 static int gaudi_cpucp_info_get(struct hl_device *hdev);
374 static void gaudi_disable_clock_gating(struct hl_device *hdev);
375 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
376 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
377                                 u32 size);
378 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
379                                 struct hl_gen_wait_properties *prop);
380
381 static inline enum hl_collective_mode
382 get_collective_mode(struct hl_device *hdev, u32 queue_id)
383 {
384         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
385                 return HL_COLLECTIVE_MASTER;
386
387         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
388                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
389                 return HL_COLLECTIVE_SLAVE;
390
391         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
392                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
393                 return HL_COLLECTIVE_SLAVE;
394
395         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
396                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
397                 return HL_COLLECTIVE_SLAVE;
398
399         return HL_COLLECTIVE_NOT_SUPPORTED;
400 }
401
402 static int gaudi_get_fixed_properties(struct hl_device *hdev)
403 {
404         struct asic_fixed_properties *prop = &hdev->asic_prop;
405         u32 num_sync_stream_queues = 0;
406         int i;
407
408         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
409         prop->hw_queues_props = kcalloc(prop->max_queues,
410                         sizeof(struct hw_queue_properties),
411                         GFP_KERNEL);
412
413         if (!prop->hw_queues_props)
414                 return -ENOMEM;
415
416         for (i = 0 ; i < prop->max_queues ; i++) {
417                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
418                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
419                         prop->hw_queues_props[i].driver_only = 0;
420                         prop->hw_queues_props[i].supports_sync_stream = 1;
421                         prop->hw_queues_props[i].cb_alloc_flags =
422                                 CB_ALLOC_KERNEL;
423                         num_sync_stream_queues++;
424                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
425                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
426                         prop->hw_queues_props[i].driver_only = 1;
427                         prop->hw_queues_props[i].supports_sync_stream = 0;
428                         prop->hw_queues_props[i].cb_alloc_flags =
429                                 CB_ALLOC_KERNEL;
430                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
431                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
432                         prop->hw_queues_props[i].driver_only = 0;
433                         prop->hw_queues_props[i].supports_sync_stream = 0;
434                         prop->hw_queues_props[i].cb_alloc_flags =
435                                 CB_ALLOC_USER;
436
437                 }
438                 prop->hw_queues_props[i].collective_mode =
439                                                 get_collective_mode(hdev, i);
440         }
441
442         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
443         prop->collective_first_sob = 0;
444         prop->collective_first_mon = 0;
445
446         /* 2 SOBs per internal queue stream are reserved for collective */
447         prop->sync_stream_first_sob =
448                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
449                         * QMAN_STREAMS * HL_RSVD_SOBS;
450
451         /* 1 monitor per internal queue stream are reserved for collective
452          * 2 monitors per external queue stream are reserved for collective
453          */
454         prop->sync_stream_first_mon =
455                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
456                         (NUMBER_OF_EXT_HW_QUEUES * 2);
457
458         prop->dram_base_address = DRAM_PHYS_BASE;
459         prop->dram_size = GAUDI_HBM_SIZE_32GB;
460         prop->dram_end_address = prop->dram_base_address +
461                                         prop->dram_size;
462         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
463
464         prop->sram_base_address = SRAM_BASE_ADDR;
465         prop->sram_size = SRAM_SIZE;
466         prop->sram_end_address = prop->sram_base_address +
467                                         prop->sram_size;
468         prop->sram_user_base_address = prop->sram_base_address +
469                                         SRAM_USER_BASE_OFFSET;
470
471         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
472         if (hdev->pldm)
473                 prop->mmu_pgt_size = 0x800000; /* 8MB */
474         else
475                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
476         prop->mmu_pte_size = HL_PTE_SIZE;
477         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
478         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
479         prop->dram_page_size = PAGE_SIZE_2MB;
480         prop->dram_supports_virtual_memory = false;
481
482         prop->pmmu.hop0_shift = HOP0_SHIFT;
483         prop->pmmu.hop1_shift = HOP1_SHIFT;
484         prop->pmmu.hop2_shift = HOP2_SHIFT;
485         prop->pmmu.hop3_shift = HOP3_SHIFT;
486         prop->pmmu.hop4_shift = HOP4_SHIFT;
487         prop->pmmu.hop0_mask = HOP0_MASK;
488         prop->pmmu.hop1_mask = HOP1_MASK;
489         prop->pmmu.hop2_mask = HOP2_MASK;
490         prop->pmmu.hop3_mask = HOP3_MASK;
491         prop->pmmu.hop4_mask = HOP4_MASK;
492         prop->pmmu.start_addr = VA_HOST_SPACE_START;
493         prop->pmmu.end_addr =
494                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
495         prop->pmmu.page_size = PAGE_SIZE_4KB;
496         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
497
498         /* PMMU and HPMMU are the same except of page size */
499         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
500         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
501
502         /* shifts and masks are the same in PMMU and DMMU */
503         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
504         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
505         prop->dmmu.end_addr = VA_HOST_SPACE_END;
506         prop->dmmu.page_size = PAGE_SIZE_2MB;
507
508         prop->cfg_size = CFG_SIZE;
509         prop->max_asid = MAX_ASID;
510         prop->num_of_events = GAUDI_EVENT_SIZE;
511         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
512
513         prop->max_power_default = MAX_POWER_DEFAULT_PCI;
514
515         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
516         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
517
518         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
519         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
520
521         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
522                                         CARD_NAME_MAX_LEN);
523
524         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
525
526         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
527                         prop->sync_stream_first_sob +
528                         (num_sync_stream_queues * HL_RSVD_SOBS);
529         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
530                         prop->sync_stream_first_mon +
531                         (num_sync_stream_queues * HL_RSVD_MONS);
532
533         /* disable fw security for now, set it in a later stage */
534         prop->fw_security_disabled = true;
535         prop->fw_security_status_valid = false;
536         prop->hard_reset_done_by_fw = false;
537
538         return 0;
539 }
540
541 static int gaudi_pci_bars_map(struct hl_device *hdev)
542 {
543         static const char * const name[] = {"SRAM", "CFG", "HBM"};
544         bool is_wc[3] = {false, false, true};
545         int rc;
546
547         rc = hl_pci_bars_map(hdev, name, is_wc);
548         if (rc)
549                 return rc;
550
551         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
552                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
553
554         return 0;
555 }
556
557 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
558 {
559         struct gaudi_device *gaudi = hdev->asic_specific;
560         struct hl_inbound_pci_region pci_region;
561         u64 old_addr = addr;
562         int rc;
563
564         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
565                 return old_addr;
566
567         /* Inbound Region 2 - Bar 4 - Point to HBM */
568         pci_region.mode = PCI_BAR_MATCH_MODE;
569         pci_region.bar = HBM_BAR_ID;
570         pci_region.addr = addr;
571         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
572         if (rc)
573                 return U64_MAX;
574
575         if (gaudi) {
576                 old_addr = gaudi->hbm_bar_cur_addr;
577                 gaudi->hbm_bar_cur_addr = addr;
578         }
579
580         return old_addr;
581 }
582
583 static int gaudi_init_iatu(struct hl_device *hdev)
584 {
585         struct hl_inbound_pci_region inbound_region;
586         struct hl_outbound_pci_region outbound_region;
587         int rc;
588
589         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
590         inbound_region.mode = PCI_BAR_MATCH_MODE;
591         inbound_region.bar = SRAM_BAR_ID;
592         inbound_region.addr = SRAM_BASE_ADDR;
593         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
594         if (rc)
595                 goto done;
596
597         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
598         inbound_region.mode = PCI_BAR_MATCH_MODE;
599         inbound_region.bar = CFG_BAR_ID;
600         inbound_region.addr = SPI_FLASH_BASE_ADDR;
601         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
602         if (rc)
603                 goto done;
604
605         /* Inbound Region 2 - Bar 4 - Point to HBM */
606         inbound_region.mode = PCI_BAR_MATCH_MODE;
607         inbound_region.bar = HBM_BAR_ID;
608         inbound_region.addr = DRAM_PHYS_BASE;
609         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
610         if (rc)
611                 goto done;
612
613         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
614
615         /* Outbound Region 0 - Point to Host */
616         outbound_region.addr = HOST_PHYS_BASE;
617         outbound_region.size = HOST_PHYS_SIZE;
618         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
619
620 done:
621         return rc;
622 }
623
624 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
625 {
626         return RREG32(mmHW_STATE);
627 }
628
629 static int gaudi_early_init(struct hl_device *hdev)
630 {
631         struct asic_fixed_properties *prop = &hdev->asic_prop;
632         struct pci_dev *pdev = hdev->pdev;
633         int rc;
634
635         rc = gaudi_get_fixed_properties(hdev);
636         if (rc) {
637                 dev_err(hdev->dev, "Failed to get fixed properties\n");
638                 return rc;
639         }
640
641         /* Check BAR sizes */
642         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
643                 dev_err(hdev->dev,
644                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
645                         SRAM_BAR_ID,
646                         (unsigned long long) pci_resource_len(pdev,
647                                                         SRAM_BAR_ID),
648                         SRAM_BAR_SIZE);
649                 rc = -ENODEV;
650                 goto free_queue_props;
651         }
652
653         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
654                 dev_err(hdev->dev,
655                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
656                         CFG_BAR_ID,
657                         (unsigned long long) pci_resource_len(pdev,
658                                                                 CFG_BAR_ID),
659                         CFG_BAR_SIZE);
660                 rc = -ENODEV;
661                 goto free_queue_props;
662         }
663
664         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
665
666         rc = hl_pci_init(hdev);
667         if (rc)
668                 goto free_queue_props;
669
670         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
671                 dev_info(hdev->dev,
672                         "H/W state is dirty, must reset before initializing\n");
673                 hdev->asic_funcs->hw_fini(hdev, true);
674         }
675
676         /* Before continuing in the initialization, we need to read the preboot
677          * version to determine whether we run with a security-enabled firmware
678          */
679         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
680                         mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
681                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
682         if (rc) {
683                 if (hdev->reset_on_preboot_fail)
684                         hdev->asic_funcs->hw_fini(hdev, true);
685                 goto pci_fini;
686         }
687
688         return 0;
689
690 pci_fini:
691         hl_pci_fini(hdev);
692 free_queue_props:
693         kfree(hdev->asic_prop.hw_queues_props);
694         return rc;
695 }
696
697 static int gaudi_early_fini(struct hl_device *hdev)
698 {
699         kfree(hdev->asic_prop.hw_queues_props);
700         hl_pci_fini(hdev);
701
702         return 0;
703 }
704
705 /**
706  * gaudi_fetch_pll_frequency - Fetch PLL frequency values
707  *
708  * @hdev: pointer to hl_device structure
709  * @pll_index: index of the pll to fetch frequency from
710  * @pll_freq: pointer to store the pll frequency in MHz in each of the available
711  *            outputs. if a certain output is not available a 0 will be set
712  *
713  */
714 static int gaudi_fetch_pll_frequency(struct hl_device *hdev,
715                                 enum gaudi_pll_index pll_index,
716                                 u16 *pll_freq_arr)
717 {
718         u32 nr = 0, nf = 0, od = 0, pll_clk = 0, div_fctr, div_sel,
719                         pll_base_addr = gaudi_pll_base_addresses[pll_index];
720         u16 freq = 0;
721         int i, rc;
722
723         if (hdev->asic_prop.fw_security_status_valid &&
724                         (hdev->asic_prop.fw_app_security_map &
725                                         CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
726                 rc = hl_fw_cpucp_pll_info_get(hdev, pll_index, pll_freq_arr);
727
728                 if (rc)
729                         return rc;
730         } else if (hdev->asic_prop.fw_security_disabled) {
731                 /* Backward compatibility */
732                 nr = RREG32(pll_base_addr + PLL_NR_OFFSET);
733                 nf = RREG32(pll_base_addr + PLL_NF_OFFSET);
734                 od = RREG32(pll_base_addr + PLL_OD_OFFSET);
735
736                 for (i = 0; i < HL_PLL_NUM_OUTPUTS; i++) {
737                         div_fctr = RREG32(pll_base_addr +
738                                         PLL_DIV_FACTOR_0_OFFSET + i * 4);
739                         div_sel = RREG32(pll_base_addr +
740                                         PLL_DIV_SEL_0_OFFSET + i * 4);
741
742                         if (div_sel == DIV_SEL_REF_CLK ||
743                                 div_sel == DIV_SEL_DIVIDED_REF) {
744                                 if (div_sel == DIV_SEL_REF_CLK)
745                                         freq = PLL_REF_CLK;
746                                 else
747                                         freq = PLL_REF_CLK / (div_fctr + 1);
748                         } else if (div_sel == DIV_SEL_PLL_CLK ||
749                                         div_sel == DIV_SEL_DIVIDED_PLL) {
750                                 pll_clk = PLL_REF_CLK * (nf + 1) /
751                                                 ((nr + 1) * (od + 1));
752                                 if (div_sel == DIV_SEL_PLL_CLK)
753                                         freq = pll_clk;
754                                 else
755                                         freq = pll_clk / (div_fctr + 1);
756                         } else {
757                                 dev_warn(hdev->dev,
758                                         "Received invalid div select value: %d",
759                                         div_sel);
760                         }
761
762                         pll_freq_arr[i] = freq;
763                 }
764         } else {
765                 dev_err(hdev->dev, "Failed to fetch PLL frequency values\n");
766                 return -EIO;
767         }
768
769         return 0;
770 }
771
772 /**
773  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
774  *
775  * @hdev: pointer to hl_device structure
776  *
777  */
778 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
779 {
780         struct asic_fixed_properties *prop = &hdev->asic_prop;
781         u16 pll_freq[HL_PLL_NUM_OUTPUTS];
782         int rc;
783
784         rc = gaudi_fetch_pll_frequency(hdev, CPU_PLL, pll_freq);
785         if (rc)
786                 return rc;
787
788         prop->psoc_timestamp_frequency = pll_freq[2];
789         prop->psoc_pci_pll_nr = 0;
790         prop->psoc_pci_pll_nf = 0;
791         prop->psoc_pci_pll_od = 0;
792         prop->psoc_pci_pll_div_factor = 0;
793
794         return 0;
795 }
796
797 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
798                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
799 {
800         struct asic_fixed_properties *prop = &hdev->asic_prop;
801         struct packet_lin_dma *init_tpc_mem_pkt;
802         struct hl_cs_job *job;
803         struct hl_cb *cb;
804         u64 dst_addr;
805         u32 cb_size, ctl;
806         u8 tpc_id;
807         int rc;
808
809         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
810         if (!cb)
811                 return -EFAULT;
812
813         init_tpc_mem_pkt = cb->kernel_address;
814         cb_size = sizeof(*init_tpc_mem_pkt);
815         memset(init_tpc_mem_pkt, 0, cb_size);
816
817         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
818
819         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
820         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
821         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
822         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
823
824         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
825
826         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
827         dst_addr = (prop->sram_user_base_address &
828                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
829                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
830         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
831
832         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
833         if (!job) {
834                 dev_err(hdev->dev, "Failed to allocate a new job\n");
835                 rc = -ENOMEM;
836                 goto release_cb;
837         }
838
839         job->id = 0;
840         job->user_cb = cb;
841         atomic_inc(&job->user_cb->cs_cnt);
842         job->user_cb_size = cb_size;
843         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
844         job->patched_cb = job->user_cb;
845         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
846
847         hl_debugfs_add_job(hdev, job);
848
849         rc = gaudi_send_job_on_qman0(hdev, job);
850
851         if (rc)
852                 goto free_job;
853
854         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
855                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
856                 if (rc)
857                         break;
858         }
859
860 free_job:
861         hl_userptr_delete_list(hdev, &job->userptr_list);
862         hl_debugfs_remove_job(hdev, job);
863         kfree(job);
864         atomic_dec(&cb->cs_cnt);
865
866 release_cb:
867         hl_cb_put(cb);
868         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
869
870         return rc;
871 }
872
873 /*
874  * gaudi_init_tpc_mem() - Initialize TPC memories.
875  * @hdev: Pointer to hl_device structure.
876  *
877  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
878  *
879  * Return: 0 for success, negative value for error.
880  */
881 static int gaudi_init_tpc_mem(struct hl_device *hdev)
882 {
883         const struct firmware *fw;
884         size_t fw_size;
885         void *cpu_addr;
886         dma_addr_t dma_handle;
887         int rc;
888
889         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
890         if (rc) {
891                 dev_err(hdev->dev, "Firmware file %s is not found!\n",
892                                 GAUDI_TPC_FW_FILE);
893                 goto out;
894         }
895
896         fw_size = fw->size;
897         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
898                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
899         if (!cpu_addr) {
900                 dev_err(hdev->dev,
901                         "Failed to allocate %zu of dma memory for TPC kernel\n",
902                         fw_size);
903                 rc = -ENOMEM;
904                 goto out;
905         }
906
907         memcpy(cpu_addr, fw->data, fw_size);
908
909         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
910
911         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
912                         dma_handle);
913
914 out:
915         release_firmware(fw);
916         return rc;
917 }
918
919 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
920 {
921         struct gaudi_device *gaudi = hdev->asic_specific;
922         struct gaudi_collective_properties *prop = &gaudi->collective_props;
923         struct hl_hw_queue *q;
924         u32 i, sob_id, sob_group_id, queue_id;
925
926         /* Iterate through SOB groups and assign a SOB for each slave queue */
927         sob_group_id =
928                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
929         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
930
931         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
932         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
933                 q = &hdev->kernel_queues[queue_id + (4 * i)];
934                 q->sync_stream_prop.collective_sob_id = sob_id + i;
935         }
936
937         /* Both DMA5 and TPC7 use the same resources since only a single
938          * engine need to participate in the reduction process
939          */
940         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
941         q = &hdev->kernel_queues[queue_id];
942         q->sync_stream_prop.collective_sob_id =
943                         sob_id + NIC_NUMBER_OF_ENGINES;
944
945         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
946         q = &hdev->kernel_queues[queue_id];
947         q->sync_stream_prop.collective_sob_id =
948                         sob_id + NIC_NUMBER_OF_ENGINES;
949 }
950
951 static void gaudi_sob_group_hw_reset(struct kref *ref)
952 {
953         struct gaudi_hw_sob_group *hw_sob_group =
954                 container_of(ref, struct gaudi_hw_sob_group, kref);
955         struct hl_device *hdev = hw_sob_group->hdev;
956         int i;
957
958         for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
959                 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
960                                 (hw_sob_group->base_sob_id + i) * 4, 0);
961
962         kref_init(&hw_sob_group->kref);
963 }
964
965 static void gaudi_sob_group_reset_error(struct kref *ref)
966 {
967         struct gaudi_hw_sob_group *hw_sob_group =
968                 container_of(ref, struct gaudi_hw_sob_group, kref);
969         struct hl_device *hdev = hw_sob_group->hdev;
970
971         dev_crit(hdev->dev,
972                 "SOB release shouldn't be called here, base_sob_id: %d\n",
973                 hw_sob_group->base_sob_id);
974 }
975
976 static int gaudi_collective_init(struct hl_device *hdev)
977 {
978         u32 i, master_monitor_sobs, sob_id, reserved_sobs_per_group;
979         struct gaudi_collective_properties *prop;
980         struct gaudi_device *gaudi;
981
982         gaudi = hdev->asic_specific;
983         prop = &gaudi->collective_props;
984         sob_id = hdev->asic_prop.collective_first_sob;
985
986         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
987         reserved_sobs_per_group =
988                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
989
990         /* Init SOB groups */
991         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
992                 prop->hw_sob_group[i].hdev = hdev;
993                 prop->hw_sob_group[i].base_sob_id = sob_id;
994                 sob_id += reserved_sobs_per_group;
995                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
996         }
997
998         for (i = 0 ; i < QMAN_STREAMS; i++) {
999                 prop->next_sob_group_val[i] = 1;
1000                 prop->curr_sob_group_idx[i] = 0;
1001                 gaudi_collective_map_sobs(hdev, i);
1002         }
1003
1004         prop->mstr_sob_mask[0] = 0;
1005         master_monitor_sobs = HL_MAX_SOBS_PER_MONITOR;
1006         for (i = 0 ; i < master_monitor_sobs ; i++)
1007                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1008                         prop->mstr_sob_mask[0] |= BIT(i);
1009
1010         prop->mstr_sob_mask[1] = 0;
1011         master_monitor_sobs =
1012                 NIC_NUMBER_OF_ENGINES - HL_MAX_SOBS_PER_MONITOR;
1013         for (i = 0 ; i < master_monitor_sobs; i++) {
1014                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1015                         prop->mstr_sob_mask[1] |= BIT(i);
1016         }
1017
1018         /* Set collective engine bit */
1019         prop->mstr_sob_mask[1] |= BIT(i);
1020
1021         return 0;
1022 }
1023
1024 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1025 {
1026         struct gaudi_device *gaudi = hdev->asic_specific;
1027         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1028
1029         kref_put(&cprop->hw_sob_group[sob_group].kref,
1030                                         gaudi_sob_group_hw_reset);
1031 }
1032
1033 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1034                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1035 {
1036         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1037         struct gaudi_collective_properties *cprop;
1038         struct hl_gen_wait_properties wait_prop;
1039         struct hl_sync_stream_properties *prop;
1040         struct gaudi_device *gaudi;
1041
1042         gaudi = hdev->asic_specific;
1043         cprop = &gaudi->collective_props;
1044         queue_id = job->hw_queue_id;
1045         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1046
1047         master_sob_base =
1048                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1049         master_monitor = prop->collective_mstr_mon_id[0];
1050
1051         dev_dbg(hdev->dev,
1052                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1053                 master_sob_base, cprop->mstr_sob_mask[0],
1054                 cprop->next_sob_group_val[stream],
1055                 master_monitor, queue_id);
1056
1057         wait_prop.data = (void *) job->patched_cb;
1058         wait_prop.sob_base = master_sob_base;
1059         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1060         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1061         wait_prop.mon_id = master_monitor;
1062         wait_prop.q_idx = queue_id;
1063         wait_prop.size = cb_size;
1064         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1065
1066         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1067         master_monitor = prop->collective_mstr_mon_id[1];
1068
1069         dev_dbg(hdev->dev,
1070                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1071                 master_sob_base, cprop->mstr_sob_mask[1],
1072                 cprop->next_sob_group_val[stream],
1073                 master_monitor, queue_id);
1074
1075         wait_prop.sob_base = master_sob_base;
1076         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1077         wait_prop.mon_id = master_monitor;
1078         wait_prop.size = cb_size;
1079         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1080 }
1081
1082 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1083                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1084 {
1085         struct hl_gen_wait_properties wait_prop;
1086         struct hl_sync_stream_properties *prop;
1087         u32 queue_id, cb_size = 0;
1088
1089         queue_id = job->hw_queue_id;
1090         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1091
1092         /* Add to wait CBs using slave monitor */
1093         wait_prop.data = (void *) job->user_cb;
1094         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1095         wait_prop.sob_mask = 0x1;
1096         wait_prop.sob_val = cs_cmpl->sob_val;
1097         wait_prop.mon_id = prop->collective_slave_mon_id;
1098         wait_prop.q_idx = queue_id;
1099         wait_prop.size = cb_size;
1100
1101         dev_dbg(hdev->dev,
1102                 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1103                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1104                 prop->collective_slave_mon_id, queue_id);
1105
1106         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1107
1108         dev_dbg(hdev->dev,
1109                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1110                 prop->collective_sob_id, queue_id);
1111
1112         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1113                         prop->collective_sob_id, cb_size);
1114 }
1115
1116 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1117 {
1118         struct hl_cs_compl *signal_cs_cmpl =
1119                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1120         struct hl_cs_compl *cs_cmpl =
1121                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1122         struct gaudi_collective_properties *cprop;
1123         u32 stream, queue_id, sob_group_offset;
1124         struct gaudi_device *gaudi;
1125         struct hl_device *hdev;
1126         struct hl_cs_job *job;
1127         struct hl_ctx *ctx;
1128
1129         ctx = cs->ctx;
1130         hdev = ctx->hdev;
1131         gaudi = hdev->asic_specific;
1132         cprop = &gaudi->collective_props;
1133
1134         /* copy the SOB id and value of the signal CS */
1135         cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1136         cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1137
1138         /* Calculate the stream from collective master queue (1st job) */
1139         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1140         stream = job->hw_queue_id % 4;
1141         sob_group_offset =
1142                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1143
1144         list_for_each_entry(job, &cs->job_list, cs_node) {
1145                 queue_id = job->hw_queue_id;
1146
1147                 if (hdev->kernel_queues[queue_id].collective_mode ==
1148                                 HL_COLLECTIVE_MASTER)
1149                         gaudi_collective_master_init_job(hdev, job, stream,
1150                                                 sob_group_offset);
1151                 else
1152                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1153         }
1154
1155         cs_cmpl->sob_group = sob_group_offset;
1156
1157         /* Handle sob group kref and wraparound */
1158         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1159         cprop->next_sob_group_val[stream]++;
1160
1161         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1162                 /*
1163                  * Decrement as we reached the max value.
1164                  * The release function won't be called here as we've
1165                  * just incremented the refcount.
1166                  */
1167                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1168                                 gaudi_sob_group_reset_error);
1169                 cprop->next_sob_group_val[stream] = 1;
1170                 /* only two SOBs are currently in use */
1171                 cprop->curr_sob_group_idx[stream] =
1172                         (cprop->curr_sob_group_idx[stream] + 1) &
1173                                                         (HL_RSVD_SOBS - 1);
1174
1175                 gaudi_collective_map_sobs(hdev, stream);
1176
1177                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1178                                 cprop->curr_sob_group_idx[stream], stream);
1179         }
1180
1181         /* Increment kref since all slave queues are now waiting on it */
1182         kref_get(&cs_cmpl->hw_sob->kref);
1183         /*
1184          * Must put the signal fence after the SOB refcnt increment so
1185          * the SOB refcnt won't turn 0 and reset the SOB before the
1186          * wait CS was submitted.
1187          */
1188         mb();
1189         hl_fence_put(cs->signal_fence);
1190         cs->signal_fence = NULL;
1191 }
1192
1193 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1194                 struct hl_ctx *ctx, struct hl_cs *cs,
1195                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1196 {
1197         struct hw_queue_properties *hw_queue_prop;
1198         struct hl_cs_counters_atomic *cntr;
1199         struct hl_cs_job *job;
1200         struct hl_cb *cb;
1201         u32 cb_size;
1202         bool patched_cb;
1203
1204         cntr = &hdev->aggregated_cs_counters;
1205
1206         if (mode == HL_COLLECTIVE_MASTER) {
1207                 /* CB size of collective master queue contains
1208                  * 4 msg short packets for monitor 1 configuration
1209                  * 1 fence packet
1210                  * 4 msg short packets for monitor 2 configuration
1211                  * 1 fence packet
1212                  * 2 msg prot packets for completion and MSI-X
1213                  */
1214                 cb_size = sizeof(struct packet_msg_short) * 8 +
1215                                 sizeof(struct packet_fence) * 2 +
1216                                 sizeof(struct packet_msg_prot) * 2;
1217                 patched_cb = true;
1218         } else {
1219                 /* CB size of collective slave queues contains
1220                  * 4 msg short packets for monitor configuration
1221                  * 1 fence packet
1222                  * 1 additional msg short packet for sob signal
1223                  */
1224                 cb_size = sizeof(struct packet_msg_short) * 5 +
1225                                 sizeof(struct packet_fence);
1226                 patched_cb = false;
1227         }
1228
1229         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1230         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1231         if (!job) {
1232                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1233                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1234                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1235                 return -ENOMEM;
1236         }
1237
1238         /* Allocate internal mapped CB for non patched CBs */
1239         cb = hl_cb_kernel_create(hdev, cb_size,
1240                         hdev->mmu_enable && !patched_cb);
1241         if (!cb) {
1242                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1243                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1244                 kfree(job);
1245                 return -EFAULT;
1246         }
1247
1248         job->id = 0;
1249         job->cs = cs;
1250         job->user_cb = cb;
1251         atomic_inc(&job->user_cb->cs_cnt);
1252         job->user_cb_size = cb_size;
1253         job->hw_queue_id = queue_id;
1254
1255         /*
1256          * No need in parsing, user CB is the patched CB.
1257          * We call hl_cb_destroy() out of two reasons - we don't need
1258          * the CB in the CB idr anymore and to decrement its refcount as
1259          * it was incremented inside hl_cb_kernel_create().
1260          */
1261         if (patched_cb)
1262                 job->patched_cb = job->user_cb;
1263         else
1264                 job->patched_cb = NULL;
1265
1266         job->job_cb_size = job->user_cb_size;
1267         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1268
1269         /* increment refcount as for external queues we get completion */
1270         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1271                 cs_get(cs);
1272
1273         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1274
1275         list_add_tail(&job->cs_node, &cs->job_list);
1276
1277         hl_debugfs_add_job(hdev, job);
1278
1279         return 0;
1280 }
1281
1282 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1283                 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1284                 u32 collective_engine_id)
1285 {
1286         struct gaudi_device *gaudi = hdev->asic_specific;
1287         struct hw_queue_properties *hw_queue_prop;
1288         u32 queue_id, collective_queue, num_jobs;
1289         u32 stream, nic_queue, nic_idx = 0;
1290         bool skip;
1291         int i, rc;
1292
1293         /* Verify wait queue id is configured as master */
1294         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1295         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1296                 dev_err(hdev->dev,
1297                         "Queue %d is not configured as collective master\n",
1298                         wait_queue_id);
1299                 return -EINVAL;
1300         }
1301
1302         /* Verify engine id is supported */
1303         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1304                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1305                 dev_err(hdev->dev,
1306                         "Collective wait does not support engine %u\n",
1307                         collective_engine_id);
1308                 return -EINVAL;
1309         }
1310
1311         stream = wait_queue_id % 4;
1312
1313         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1314                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1315         else
1316                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1317
1318         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1319         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1320
1321         /* First job goes to the collective master queue, it will wait for
1322          * the collective slave queues to finish execution.
1323          * The synchronization is done using two monitors:
1324          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1325          * reduction engine (DMA5/TPC7).
1326          *
1327          * Rest of the jobs goes to the collective slave queues which will
1328          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1329          */
1330         for (i = 0 ; i < num_jobs ; i++) {
1331                 if (i == 0) {
1332                         queue_id = wait_queue_id;
1333                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1334                                 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1335                 } else {
1336                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1337                                 if (gaudi->hw_cap_initialized &
1338                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1339                                         skip = false;
1340                                 else
1341                                         skip = true;
1342
1343                                 queue_id = nic_queue;
1344                                 nic_queue += 4;
1345                                 nic_idx++;
1346
1347                                 if (skip)
1348                                         continue;
1349                         } else {
1350                                 queue_id = collective_queue;
1351                         }
1352
1353                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1354                                 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1355                 }
1356
1357                 if (rc)
1358                         return rc;
1359         }
1360
1361         return rc;
1362 }
1363
1364 static int gaudi_late_init(struct hl_device *hdev)
1365 {
1366         struct gaudi_device *gaudi = hdev->asic_specific;
1367         int rc;
1368
1369         rc = gaudi->cpucp_info_get(hdev);
1370         if (rc) {
1371                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1372                 return rc;
1373         }
1374
1375         if ((hdev->card_type == cpucp_card_type_pci) &&
1376                         (hdev->nic_ports_mask & 0x3)) {
1377                 dev_info(hdev->dev,
1378                         "PCI card detected, only 8 ports are enabled\n");
1379                 hdev->nic_ports_mask &= ~0x3;
1380
1381                 /* Stop and disable unused NIC QMANs */
1382                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1383                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1384                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1385
1386                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1387                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1388                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1389
1390                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1391                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1392
1393                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1394         }
1395
1396         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1397         if (rc) {
1398                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1399                 return rc;
1400         }
1401
1402         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
1403
1404         rc = gaudi_fetch_psoc_frequency(hdev);
1405         if (rc) {
1406                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1407                 goto disable_pci_access;
1408         }
1409
1410         rc = gaudi_mmu_clear_pgt_range(hdev);
1411         if (rc) {
1412                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1413                 goto disable_pci_access;
1414         }
1415
1416         rc = gaudi_init_tpc_mem(hdev);
1417         if (rc) {
1418                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1419                 goto disable_pci_access;
1420         }
1421
1422         rc = gaudi_collective_init(hdev);
1423         if (rc) {
1424                 dev_err(hdev->dev, "Failed to init collective\n");
1425                 goto disable_pci_access;
1426         }
1427
1428         return 0;
1429
1430 disable_pci_access:
1431         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1432
1433         return rc;
1434 }
1435
1436 static void gaudi_late_fini(struct hl_device *hdev)
1437 {
1438         const struct hwmon_channel_info **channel_info_arr;
1439         int i = 0;
1440
1441         if (!hdev->hl_chip_info->info)
1442                 return;
1443
1444         channel_info_arr = hdev->hl_chip_info->info;
1445
1446         while (channel_info_arr[i]) {
1447                 kfree(channel_info_arr[i]->config);
1448                 kfree(channel_info_arr[i]);
1449                 i++;
1450         }
1451
1452         kfree(channel_info_arr);
1453
1454         hdev->hl_chip_info->info = NULL;
1455 }
1456
1457 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1458 {
1459         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1460         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1461         int i, j, rc = 0;
1462
1463         /*
1464          * The device CPU works with 40-bits addresses, while bit 39 must be set
1465          * to '1' when accessing the host.
1466          * Bits 49:39 of the full host address are saved for a later
1467          * configuration of the HW to perform extension to 50 bits.
1468          * Because there is a single HW register that holds the extension bits,
1469          * these bits must be identical in all allocated range.
1470          */
1471
1472         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1473                 virt_addr_arr[i] =
1474                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1475                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1476                                                 &dma_addr_arr[i],
1477                                                 GFP_KERNEL | __GFP_ZERO);
1478                 if (!virt_addr_arr[i]) {
1479                         rc = -ENOMEM;
1480                         goto free_dma_mem_arr;
1481                 }
1482
1483                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1484                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1485                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1486                         break;
1487         }
1488
1489         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1490                 dev_err(hdev->dev,
1491                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1492                 rc = -EFAULT;
1493                 goto free_dma_mem_arr;
1494         }
1495
1496         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1497         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1498         hdev->cpu_pci_msb_addr =
1499                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1500
1501         if (hdev->asic_prop.fw_security_disabled)
1502                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1503
1504 free_dma_mem_arr:
1505         for (j = 0 ; j < i ; j++)
1506                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1507                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1508                                                 virt_addr_arr[j],
1509                                                 dma_addr_arr[j]);
1510
1511         return rc;
1512 }
1513
1514 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1515 {
1516         struct gaudi_device *gaudi = hdev->asic_specific;
1517         struct gaudi_internal_qman_info *q;
1518         u32 i;
1519
1520         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1521                 q = &gaudi->internal_qmans[i];
1522                 if (!q->pq_kernel_addr)
1523                         continue;
1524                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1525                                                         q->pq_kernel_addr,
1526                                                         q->pq_dma_addr);
1527         }
1528 }
1529
1530 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1531 {
1532         struct gaudi_device *gaudi = hdev->asic_specific;
1533         struct gaudi_internal_qman_info *q;
1534         int rc, i;
1535
1536         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1537                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1538                         continue;
1539
1540                 q = &gaudi->internal_qmans[i];
1541
1542                 switch (i) {
1543                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1544                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1545                         break;
1546                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1547                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1548                         break;
1549                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1550                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1551                         break;
1552                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1553                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1554                         break;
1555                 default:
1556                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1557                         rc = -EINVAL;
1558                         goto free_internal_qmans_pq_mem;
1559                 }
1560
1561                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1562                                                 hdev, q->pq_size,
1563                                                 &q->pq_dma_addr,
1564                                                 GFP_KERNEL | __GFP_ZERO);
1565                 if (!q->pq_kernel_addr) {
1566                         rc = -ENOMEM;
1567                         goto free_internal_qmans_pq_mem;
1568                 }
1569         }
1570
1571         return 0;
1572
1573 free_internal_qmans_pq_mem:
1574         gaudi_free_internal_qmans_pq_mem(hdev);
1575         return rc;
1576 }
1577
1578 static int gaudi_sw_init(struct hl_device *hdev)
1579 {
1580         struct gaudi_device *gaudi;
1581         u32 i, event_id = 0;
1582         int rc;
1583
1584         /* Allocate device structure */
1585         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1586         if (!gaudi)
1587                 return -ENOMEM;
1588
1589         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1590                 if (gaudi_irq_map_table[i].valid) {
1591                         if (event_id == GAUDI_EVENT_SIZE) {
1592                                 dev_err(hdev->dev,
1593                                         "Event array exceeds the limit of %u events\n",
1594                                         GAUDI_EVENT_SIZE);
1595                                 rc = -EINVAL;
1596                                 goto free_gaudi_device;
1597                         }
1598
1599                         gaudi->events[event_id++] =
1600                                         gaudi_irq_map_table[i].fc_id;
1601                 }
1602         }
1603
1604         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1605
1606         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1607
1608         hdev->asic_specific = gaudi;
1609
1610         /* Create DMA pool for small allocations */
1611         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1612                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1613         if (!hdev->dma_pool) {
1614                 dev_err(hdev->dev, "failed to create DMA pool\n");
1615                 rc = -ENOMEM;
1616                 goto free_gaudi_device;
1617         }
1618
1619         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1620         if (rc)
1621                 goto free_dma_pool;
1622
1623         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1624         if (!hdev->cpu_accessible_dma_pool) {
1625                 dev_err(hdev->dev,
1626                         "Failed to create CPU accessible DMA pool\n");
1627                 rc = -ENOMEM;
1628                 goto free_cpu_dma_mem;
1629         }
1630
1631         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1632                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1633                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1634         if (rc) {
1635                 dev_err(hdev->dev,
1636                         "Failed to add memory to CPU accessible DMA pool\n");
1637                 rc = -EFAULT;
1638                 goto free_cpu_accessible_dma_pool;
1639         }
1640
1641         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1642         if (rc)
1643                 goto free_cpu_accessible_dma_pool;
1644
1645         spin_lock_init(&gaudi->hw_queues_lock);
1646         mutex_init(&gaudi->clk_gate_mutex);
1647
1648         hdev->supports_sync_stream = true;
1649         hdev->supports_coresight = true;
1650
1651         return 0;
1652
1653 free_cpu_accessible_dma_pool:
1654         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1655 free_cpu_dma_mem:
1656         if (hdev->asic_prop.fw_security_disabled)
1657                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1658                                         hdev->cpu_pci_msb_addr);
1659         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1660                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1661                         hdev->cpu_accessible_dma_mem,
1662                         hdev->cpu_accessible_dma_address);
1663 free_dma_pool:
1664         dma_pool_destroy(hdev->dma_pool);
1665 free_gaudi_device:
1666         kfree(gaudi);
1667         return rc;
1668 }
1669
1670 static int gaudi_sw_fini(struct hl_device *hdev)
1671 {
1672         struct gaudi_device *gaudi = hdev->asic_specific;
1673
1674         gaudi_free_internal_qmans_pq_mem(hdev);
1675
1676         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1677
1678         if (hdev->asic_prop.fw_security_disabled)
1679                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1680                                         hdev->cpu_pci_msb_addr);
1681
1682         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1683                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1684                         hdev->cpu_accessible_dma_mem,
1685                         hdev->cpu_accessible_dma_address);
1686
1687         dma_pool_destroy(hdev->dma_pool);
1688
1689         mutex_destroy(&gaudi->clk_gate_mutex);
1690
1691         kfree(gaudi);
1692
1693         return 0;
1694 }
1695
1696 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1697 {
1698         struct hl_device *hdev = arg;
1699         int i;
1700
1701         if (hdev->disabled)
1702                 return IRQ_HANDLED;
1703
1704         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1705                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1706
1707         hl_irq_handler_eq(irq, &hdev->event_queue);
1708
1709         return IRQ_HANDLED;
1710 }
1711
1712 /*
1713  * For backward compatibility, new MSI interrupts should be set after the
1714  * existing CPU and NIC interrupts.
1715  */
1716 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1717                                 bool cpu_eq)
1718 {
1719         int msi_vec;
1720
1721         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1722                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1723                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1724
1725         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1726                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1727
1728         return pci_irq_vector(hdev->pdev, msi_vec);
1729 }
1730
1731 static int gaudi_enable_msi_single(struct hl_device *hdev)
1732 {
1733         int rc, irq;
1734
1735         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1736
1737         irq = gaudi_pci_irq_vector(hdev, 0, false);
1738         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1739                         "gaudi single msi", hdev);
1740         if (rc)
1741                 dev_err(hdev->dev,
1742                         "Failed to request single MSI IRQ\n");
1743
1744         return rc;
1745 }
1746
1747 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1748 {
1749         int cq_cnt = hdev->asic_prop.completion_queues_count;
1750         int rc, i, irq_cnt_init, irq;
1751
1752         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1753                 irq = gaudi_pci_irq_vector(hdev, i, false);
1754                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1755                                 &hdev->completion_queue[i]);
1756                 if (rc) {
1757                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1758                         goto free_irqs;
1759                 }
1760         }
1761
1762         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1763         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1764                                 &hdev->event_queue);
1765         if (rc) {
1766                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1767                 goto free_irqs;
1768         }
1769
1770         return 0;
1771
1772 free_irqs:
1773         for (i = 0 ; i < irq_cnt_init ; i++)
1774                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1775                                 &hdev->completion_queue[i]);
1776         return rc;
1777 }
1778
1779 static int gaudi_enable_msi(struct hl_device *hdev)
1780 {
1781         struct gaudi_device *gaudi = hdev->asic_specific;
1782         int rc;
1783
1784         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1785                 return 0;
1786
1787         rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1788                                         PCI_IRQ_MSI);
1789         if (rc < 0) {
1790                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1791                 return rc;
1792         }
1793
1794         if (rc < NUMBER_OF_INTERRUPTS) {
1795                 gaudi->multi_msi_mode = false;
1796                 rc = gaudi_enable_msi_single(hdev);
1797         } else {
1798                 gaudi->multi_msi_mode = true;
1799                 rc = gaudi_enable_msi_multi(hdev);
1800         }
1801
1802         if (rc)
1803                 goto free_pci_irq_vectors;
1804
1805         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1806
1807         return 0;
1808
1809 free_pci_irq_vectors:
1810         pci_free_irq_vectors(hdev->pdev);
1811         return rc;
1812 }
1813
1814 static void gaudi_sync_irqs(struct hl_device *hdev)
1815 {
1816         struct gaudi_device *gaudi = hdev->asic_specific;
1817         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1818
1819         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1820                 return;
1821
1822         /* Wait for all pending IRQs to be finished */
1823         if (gaudi->multi_msi_mode) {
1824                 for (i = 0 ; i < cq_cnt ; i++)
1825                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1826
1827                 synchronize_irq(gaudi_pci_irq_vector(hdev,
1828                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
1829                                                 true));
1830         } else {
1831                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1832         }
1833 }
1834
1835 static void gaudi_disable_msi(struct hl_device *hdev)
1836 {
1837         struct gaudi_device *gaudi = hdev->asic_specific;
1838         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1839
1840         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1841                 return;
1842
1843         gaudi_sync_irqs(hdev);
1844
1845         if (gaudi->multi_msi_mode) {
1846                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1847                                                 true);
1848                 free_irq(irq, &hdev->event_queue);
1849
1850                 for (i = 0 ; i < cq_cnt ; i++) {
1851                         irq = gaudi_pci_irq_vector(hdev, i, false);
1852                         free_irq(irq, &hdev->completion_queue[i]);
1853                 }
1854         } else {
1855                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1856         }
1857
1858         pci_free_irq_vectors(hdev->pdev);
1859
1860         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1861 }
1862
1863 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1864 {
1865         struct gaudi_device *gaudi = hdev->asic_specific;
1866
1867         if (!hdev->asic_prop.fw_security_disabled)
1868                 return;
1869
1870         if (hdev->asic_prop.fw_security_status_valid &&
1871                         (hdev->asic_prop.fw_app_security_map &
1872                                         CPU_BOOT_DEV_STS0_SRAM_SCR_EN))
1873                 return;
1874
1875         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1876                 return;
1877
1878         if (!hdev->sram_scrambler_enable)
1879                 return;
1880
1881         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1882                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1883         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1884                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1885         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1886                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1887         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1888                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1889         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1890                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1891         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1892                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1893         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1894                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1895         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1896                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1897
1898         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1899                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1900         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1901                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1902         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1903                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1904         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1905                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1906         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1907                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1908         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1909                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1910         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1911                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1912         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1913                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1914
1915         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1916                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1917         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1918                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1919         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1920                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1921         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1922                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1923         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1924                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1925         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1926                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1927         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1928                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1929         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1930                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1931
1932         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1933 }
1934
1935 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1936 {
1937         struct gaudi_device *gaudi = hdev->asic_specific;
1938
1939         if (!hdev->asic_prop.fw_security_disabled)
1940                 return;
1941
1942         if (hdev->asic_prop.fw_security_status_valid &&
1943                         (hdev->asic_prop.fw_boot_cpu_security_map &
1944                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN))
1945                 return;
1946
1947         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1948                 return;
1949
1950         if (!hdev->dram_scrambler_enable)
1951                 return;
1952
1953         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1954                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1955         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1956                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1957         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1958                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1959         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1960                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1961         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1962                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1963         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1964                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1965         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1966                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1967         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1968                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1969
1970         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1971                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1972         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1973                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1974         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1975                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1976         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1977                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1978         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1979                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1980         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1981                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1982         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1983                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1984         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1985                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1986
1987         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1988                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1989         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1990                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1991         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1992                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1993         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1994                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1995         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1996                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1997         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1998                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1999         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2000                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2001         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2002                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2003
2004         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2005 }
2006
2007 static void gaudi_init_e2e(struct hl_device *hdev)
2008 {
2009         if (!hdev->asic_prop.fw_security_disabled)
2010                 return;
2011
2012         if (hdev->asic_prop.fw_security_status_valid &&
2013                         (hdev->asic_prop.fw_boot_cpu_security_map &
2014                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN))
2015                 return;
2016
2017         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2018         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2019         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2020         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2021
2022         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2023         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2024         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2025         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2026
2027         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2028         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2029         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2030         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2031
2032         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2033         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2034         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2035         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2036
2037         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2038         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2039         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2040         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2041
2042         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2043         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2044         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2045         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2046
2047         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2048         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2049         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2050         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2051
2052         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2053         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2054         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2055         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2056
2057         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2058         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2059         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2060         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2061
2062         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2063         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2064         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2065         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2066
2067         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2068         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2069         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2070         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2071
2072         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2073         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2074         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2075         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2076
2077         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2078         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2079         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2080         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2081
2082         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2083         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2084         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2085         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2086
2087         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2088         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2089         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2090         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2091
2092         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2093         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2094         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2095         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2096
2097         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2098         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2099         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2100         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2101
2102         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2103         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2104         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2105         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2106
2107         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2108         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2109         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2110         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2111
2112         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2113         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2114         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2115         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2116
2117         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2118         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2119         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2120         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2121
2122         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2123         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2124         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2125         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2126
2127         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2128         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2129         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2130         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2131
2132         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2133         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2134         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2135         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2136
2137         if (!hdev->dram_scrambler_enable) {
2138                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2139                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2140                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2141                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2142
2143                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2144                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2145                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2146                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2147
2148                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2149                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2150                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2151                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2152
2153                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2154                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2155                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2156                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2157
2158                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2159                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2160                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2161                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2162
2163                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2164                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2165                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2166                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2167
2168                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2169                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2170                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2171                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2172
2173                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2174                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2175                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2176                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2177
2178                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2179                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2180                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2181                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2182
2183                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2184                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2185                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2186                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2187
2188                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2189                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2190                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2191                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2192
2193                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2194                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2195                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2196                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2197
2198                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2199                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2200                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2201                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2202
2203                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2204                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2205                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2206                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2207
2208                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2209                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2210                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2211                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2212
2213                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2214                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2215                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2216                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2217
2218                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2219                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2220                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2221                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2222
2223                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2224                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2225                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2226                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2227
2228                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2229                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2230                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2231                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2232
2233                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2234                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2235                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2236                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2237
2238                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2239                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2240                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2241                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2242
2243                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2244                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2245                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2246                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2247
2248                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2249                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2250                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2251                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2252
2253                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2254                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2255                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2256                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2257         }
2258
2259         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2260                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2261         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2262                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2263
2264         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2265                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2266         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2267                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2268
2269         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2270                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2271         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2272                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2273
2274         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2275                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2276         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2277                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2278
2279         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2280                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2281         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2282                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2283
2284         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2285                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2286         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2287                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2288
2289         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2290                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2291         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2292                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2293
2294         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2295                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2296         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2297                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2298
2299         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2300                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2301         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2302                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2303
2304         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2305                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2306         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2307                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2308
2309         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2310                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2311         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2312                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2313
2314         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2315                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2316         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2317                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2318
2319         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2320                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2321         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2322                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2323
2324         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2325                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2326         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2327                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2328
2329         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2330                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2331         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2332                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2333
2334         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2335                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2336         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2337                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2338
2339         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2340                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2341         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2342                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2343
2344         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2345                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2346         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2347                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2348
2349         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2350                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2351         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2352                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2353
2354         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2355                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2356         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2357                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2358
2359         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2360                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2361         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2362                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2363
2364         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2365                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2366         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2367                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2368
2369         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2370                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2371         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2372                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2373
2374         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2375                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2376         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2377                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2378 }
2379
2380 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2381 {
2382         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2383
2384         if (!hdev->asic_prop.fw_security_disabled)
2385                 return;
2386
2387         if (hdev->asic_prop.fw_security_status_valid &&
2388                         (hdev->asic_prop.fw_boot_cpu_security_map &
2389                                         CPU_BOOT_DEV_STS0_HBM_CRED_EN))
2390                 return;
2391
2392         hbm0_wr = 0x33333333;
2393         hbm0_rd = 0x77777777;
2394         hbm1_wr = 0x55555555;
2395         hbm1_rd = 0xDDDDDDDD;
2396
2397         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2398         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2399         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2400         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2401
2402         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2403         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2404         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2405         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2406
2407         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2408         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2409         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2410         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2411
2412         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2413         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2414         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2415         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2416
2417         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2418                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2419                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2420         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2421                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2422                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2423         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2424                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2425                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2426         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2427                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2428                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2429
2430         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2431                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2432                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2433         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2434                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2435                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2436         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2437                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2438                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2439         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2440                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2441                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2442 }
2443
2444 static void gaudi_init_golden_registers(struct hl_device *hdev)
2445 {
2446         u32 tpc_offset;
2447         int tpc_id, i;
2448
2449         gaudi_init_e2e(hdev);
2450         gaudi_init_hbm_cred(hdev);
2451
2452         hdev->asic_funcs->disable_clock_gating(hdev);
2453
2454         for (tpc_id = 0, tpc_offset = 0;
2455                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2456                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2457                 /* Mask all arithmetic interrupts from TPC */
2458                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2459                 /* Set 16 cache lines */
2460                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2461                                 ICACHE_FETCH_LINE_NUM, 2);
2462         }
2463
2464         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2465         for (i = 0 ; i < 128 ; i += 8)
2466                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2467
2468         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2469         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2470         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2471         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2472 }
2473
2474 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2475                                         int qman_id, dma_addr_t qman_pq_addr)
2476 {
2477         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2478         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2479         u32 q_off, dma_qm_offset;
2480         u32 dma_qm_err_cfg;
2481
2482         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2483
2484         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2485                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2486         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2487                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2488         so_base_en_lo = lower_32_bits(CFG_BASE +
2489                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2490         so_base_en_hi = upper_32_bits(CFG_BASE +
2491                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2492         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2493                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2494         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2495                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2496         so_base_ws_lo = lower_32_bits(CFG_BASE +
2497                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2498         so_base_ws_hi = upper_32_bits(CFG_BASE +
2499                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2500
2501         q_off = dma_qm_offset + qman_id * 4;
2502
2503         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2504         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2505
2506         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2507         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2508         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2509
2510         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2511         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2512                                                         QMAN_LDMA_SRC_OFFSET);
2513         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2514                                                         QMAN_LDMA_DST_OFFSET);
2515
2516         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2517         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2518         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2519         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2520         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2521         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2522         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2523         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2524
2525         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2526
2527         /* The following configuration is needed only once per QMAN */
2528         if (qman_id == 0) {
2529                 /* Configure RAZWI IRQ */
2530                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2531                 if (hdev->stop_on_err) {
2532                         dma_qm_err_cfg |=
2533                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2534                 }
2535
2536                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2537                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2538                         lower_32_bits(CFG_BASE +
2539                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2540                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2541                         upper_32_bits(CFG_BASE +
2542                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2543                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2544                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2545                                                                         dma_id);
2546
2547                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2548                                 QM_ARB_ERR_MSG_EN_MASK);
2549
2550                 /* Increase ARB WDT to support streams architecture */
2551                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2552                                 GAUDI_ARB_WDT_TIMEOUT);
2553
2554                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2555                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2556
2557                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2558         }
2559 }
2560
2561 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2562 {
2563         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2564         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2565
2566         /* Set to maximum possible according to physical size */
2567         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2568         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2569
2570         /* WA for H/W bug H3-2116 */
2571         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2572
2573         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2574         if (hdev->stop_on_err)
2575                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2576
2577         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2578         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2579                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2580         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2581                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2582         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2583                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2584         WREG32(mmDMA0_CORE_PROT + dma_offset,
2585                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2586         /* If the channel is secured, it should be in MMU bypass mode */
2587         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2588                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2589         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2590 }
2591
2592 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2593                                 u32 enable_mask)
2594 {
2595         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2596
2597         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2598 }
2599
2600 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2601 {
2602         struct gaudi_device *gaudi = hdev->asic_specific;
2603         struct hl_hw_queue *q;
2604         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2605
2606         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2607                 return;
2608
2609         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2610                 dma_id = gaudi_dma_assignment[i];
2611                 /*
2612                  * For queues after the CPU Q need to add 1 to get the correct
2613                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2614                  * order to get the correct MSI register.
2615                  */
2616                 if (dma_id > 1) {
2617                         cpu_skip = 1;
2618                         nic_skip = NIC_NUMBER_OF_ENGINES;
2619                 } else {
2620                         cpu_skip = 0;
2621                         nic_skip = 0;
2622                 }
2623
2624                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2625                         q_idx = 4 * dma_id + j + cpu_skip;
2626                         q = &hdev->kernel_queues[q_idx];
2627                         q->cq_id = cq_id++;
2628                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2629                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2630                                                 q->bus_address);
2631                 }
2632
2633                 gaudi_init_dma_core(hdev, dma_id);
2634
2635                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2636         }
2637
2638         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2639 }
2640
2641 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2642                                         int qman_id, u64 qman_base_addr)
2643 {
2644         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2645         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2646         u32 q_off, dma_qm_offset;
2647         u32 dma_qm_err_cfg;
2648
2649         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2650
2651         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2652                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2653         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2654                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2655         so_base_en_lo = lower_32_bits(CFG_BASE +
2656                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2657         so_base_en_hi = upper_32_bits(CFG_BASE +
2658                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2659         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2660                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2661         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2662                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2663         so_base_ws_lo = lower_32_bits(CFG_BASE +
2664                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2665         so_base_ws_hi = upper_32_bits(CFG_BASE +
2666                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2667
2668         q_off = dma_qm_offset + qman_id * 4;
2669
2670         if (qman_id < 4) {
2671                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2672                                         lower_32_bits(qman_base_addr));
2673                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2674                                         upper_32_bits(qman_base_addr));
2675
2676                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2677                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2678                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2679
2680                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2681                                                         QMAN_CPDMA_SIZE_OFFSET);
2682                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2683                                                         QMAN_CPDMA_SRC_OFFSET);
2684                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2685                                                         QMAN_CPDMA_DST_OFFSET);
2686         } else {
2687                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2688                                                         QMAN_LDMA_SIZE_OFFSET);
2689                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2690                                                         QMAN_LDMA_SRC_OFFSET);
2691                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2692                                                         QMAN_LDMA_DST_OFFSET);
2693
2694                 /* Configure RAZWI IRQ */
2695                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2696                 if (hdev->stop_on_err) {
2697                         dma_qm_err_cfg |=
2698                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2699                 }
2700                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2701
2702                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2703                         lower_32_bits(CFG_BASE +
2704                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2705                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2706                         upper_32_bits(CFG_BASE +
2707                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2708                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2709                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2710                                                                         dma_id);
2711
2712                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2713                                 QM_ARB_ERR_MSG_EN_MASK);
2714
2715                 /* Increase ARB WDT to support streams architecture */
2716                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2717                                 GAUDI_ARB_WDT_TIMEOUT);
2718
2719                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2720                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2721                                 QMAN_INTERNAL_MAKE_TRUSTED);
2722         }
2723
2724         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2725         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2726         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2727         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2728
2729         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2730         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2731                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2732                                 mtr_base_ws_lo);
2733                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2734                                 mtr_base_ws_hi);
2735                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2736                                 so_base_ws_lo);
2737                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2738                                 so_base_ws_hi);
2739         }
2740 }
2741
2742 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2743 {
2744         struct gaudi_device *gaudi = hdev->asic_specific;
2745         struct gaudi_internal_qman_info *q;
2746         u64 qman_base_addr;
2747         int i, j, dma_id, internal_q_index;
2748
2749         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2750                 return;
2751
2752         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2753                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2754
2755                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2756                          /*
2757                           * Add the CPU queue in order to get the correct queue
2758                           * number as all internal queue are placed after it
2759                           */
2760                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2761
2762                         q = &gaudi->internal_qmans[internal_q_index];
2763                         qman_base_addr = (u64) q->pq_dma_addr;
2764                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2765                                                 qman_base_addr);
2766                 }
2767
2768                 /* Initializing lower CP for HBM DMA QMAN */
2769                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2770
2771                 gaudi_init_dma_core(hdev, dma_id);
2772
2773                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2774         }
2775
2776         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2777 }
2778
2779 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2780                                         int qman_id, u64 qman_base_addr)
2781 {
2782         u32 mtr_base_lo, mtr_base_hi;
2783         u32 so_base_lo, so_base_hi;
2784         u32 q_off, mme_id;
2785         u32 mme_qm_err_cfg;
2786
2787         mtr_base_lo = lower_32_bits(CFG_BASE +
2788                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2789         mtr_base_hi = upper_32_bits(CFG_BASE +
2790                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2791         so_base_lo = lower_32_bits(CFG_BASE +
2792                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2793         so_base_hi = upper_32_bits(CFG_BASE +
2794                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2795
2796         q_off = mme_offset + qman_id * 4;
2797
2798         if (qman_id < 4) {
2799                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2800                                         lower_32_bits(qman_base_addr));
2801                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2802                                         upper_32_bits(qman_base_addr));
2803
2804                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2805                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2806                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2807
2808                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2809                                                         QMAN_CPDMA_SIZE_OFFSET);
2810                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2811                                                         QMAN_CPDMA_SRC_OFFSET);
2812                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2813                                                         QMAN_CPDMA_DST_OFFSET);
2814         } else {
2815                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2816                                                         QMAN_LDMA_SIZE_OFFSET);
2817                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2818                                                         QMAN_LDMA_SRC_OFFSET);
2819                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2820                                                         QMAN_LDMA_DST_OFFSET);
2821
2822                 /* Configure RAZWI IRQ */
2823                 mme_id = mme_offset /
2824                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2825
2826                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2827                 if (hdev->stop_on_err) {
2828                         mme_qm_err_cfg |=
2829                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2830                 }
2831                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2832                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2833                         lower_32_bits(CFG_BASE +
2834                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2835                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2836                         upper_32_bits(CFG_BASE +
2837                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2838                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2839                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2840                                                                         mme_id);
2841
2842                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2843                                 QM_ARB_ERR_MSG_EN_MASK);
2844
2845                 /* Increase ARB WDT to support streams architecture */
2846                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2847                                 GAUDI_ARB_WDT_TIMEOUT);
2848
2849                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2850                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2851                                 QMAN_INTERNAL_MAKE_TRUSTED);
2852         }
2853
2854         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2855         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2856         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2857         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2858 }
2859
2860 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2861 {
2862         struct gaudi_device *gaudi = hdev->asic_specific;
2863         struct gaudi_internal_qman_info *q;
2864         u64 qman_base_addr;
2865         u32 mme_offset;
2866         int i, internal_q_index;
2867
2868         if (gaudi->hw_cap_initialized & HW_CAP_MME)
2869                 return;
2870
2871         /*
2872          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2873          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2874          */
2875
2876         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2877
2878         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2879                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2880                 q = &gaudi->internal_qmans[internal_q_index];
2881                 qman_base_addr = (u64) q->pq_dma_addr;
2882                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2883                                         qman_base_addr);
2884                 if (i == 3)
2885                         mme_offset = 0;
2886         }
2887
2888         /* Initializing lower CP for MME QMANs */
2889         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2890         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2891         gaudi_init_mme_qman(hdev, 0, 4, 0);
2892
2893         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2894         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2895
2896         gaudi->hw_cap_initialized |= HW_CAP_MME;
2897 }
2898
2899 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2900                                 int qman_id, u64 qman_base_addr)
2901 {
2902         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2903         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2904         u32 q_off, tpc_id;
2905         u32 tpc_qm_err_cfg;
2906
2907         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2908                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2909         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2910                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2911         so_base_en_lo = lower_32_bits(CFG_BASE +
2912                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2913         so_base_en_hi = upper_32_bits(CFG_BASE +
2914                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2915         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2916                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2917         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2918                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2919         so_base_ws_lo = lower_32_bits(CFG_BASE +
2920                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2921         so_base_ws_hi = upper_32_bits(CFG_BASE +
2922                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2923
2924         q_off = tpc_offset + qman_id * 4;
2925
2926         tpc_id = tpc_offset /
2927                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2928
2929         if (qman_id < 4) {
2930                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2931                                         lower_32_bits(qman_base_addr));
2932                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2933                                         upper_32_bits(qman_base_addr));
2934
2935                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2936                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2937                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2938
2939                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2940                                                         QMAN_CPDMA_SIZE_OFFSET);
2941                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2942                                                         QMAN_CPDMA_SRC_OFFSET);
2943                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2944                                                         QMAN_CPDMA_DST_OFFSET);
2945         } else {
2946                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2947                                                         QMAN_LDMA_SIZE_OFFSET);
2948                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2949                                                         QMAN_LDMA_SRC_OFFSET);
2950                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2951                                                         QMAN_LDMA_DST_OFFSET);
2952
2953                 /* Configure RAZWI IRQ */
2954                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2955                 if (hdev->stop_on_err) {
2956                         tpc_qm_err_cfg |=
2957                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2958                 }
2959
2960                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2961                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2962                         lower_32_bits(CFG_BASE +
2963                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2964                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2965                         upper_32_bits(CFG_BASE +
2966                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2967                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2968                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2969                                                                         tpc_id);
2970
2971                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2972                                 QM_ARB_ERR_MSG_EN_MASK);
2973
2974                 /* Increase ARB WDT to support streams architecture */
2975                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2976                                 GAUDI_ARB_WDT_TIMEOUT);
2977
2978                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2979                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2980                                 QMAN_INTERNAL_MAKE_TRUSTED);
2981         }
2982
2983         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2984         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2985         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2986         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2987
2988         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
2989         if (tpc_id == 6) {
2990                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2991                                 mtr_base_ws_lo);
2992                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2993                                 mtr_base_ws_hi);
2994                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2995                                 so_base_ws_lo);
2996                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2997                                 so_base_ws_hi);
2998         }
2999 }
3000
3001 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3002 {
3003         struct gaudi_device *gaudi = hdev->asic_specific;
3004         struct gaudi_internal_qman_info *q;
3005         u64 qman_base_addr;
3006         u32 so_base_hi, tpc_offset = 0;
3007         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3008                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3009         int i, tpc_id, internal_q_index;
3010
3011         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3012                 return;
3013
3014         so_base_hi = upper_32_bits(CFG_BASE +
3015                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3016
3017         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3018                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3019                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3020                                                 tpc_id * QMAN_STREAMS + i;
3021                         q = &gaudi->internal_qmans[internal_q_index];
3022                         qman_base_addr = (u64) q->pq_dma_addr;
3023                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3024                                                 qman_base_addr);
3025
3026                         if (i == 3) {
3027                                 /* Initializing lower CP for TPC QMAN */
3028                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3029
3030                                 /* Enable the QMAN and TPC channel */
3031                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3032                                                 QMAN_TPC_ENABLE);
3033                         }
3034                 }
3035
3036                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3037                                 so_base_hi);
3038
3039                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3040
3041                 gaudi->hw_cap_initialized |=
3042                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3043         }
3044 }
3045
3046 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3047                                 int qman_id, u64 qman_base_addr, int nic_id)
3048 {
3049         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3050         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3051         u32 q_off;
3052         u32 nic_qm_err_cfg;
3053
3054         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3055                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3056         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3057                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3058         so_base_en_lo = lower_32_bits(CFG_BASE +
3059                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3060         so_base_en_hi = upper_32_bits(CFG_BASE +
3061                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3062         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3063                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3064         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3065                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3066         so_base_ws_lo = lower_32_bits(CFG_BASE +
3067                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3068         so_base_ws_hi = upper_32_bits(CFG_BASE +
3069                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3070
3071         q_off = nic_offset + qman_id * 4;
3072
3073         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3074         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3075
3076         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3077         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3078         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3079
3080         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3081                                                         QMAN_LDMA_SIZE_OFFSET);
3082         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3083                                                         QMAN_LDMA_SRC_OFFSET);
3084         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3085                                                         QMAN_LDMA_DST_OFFSET);
3086
3087         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3088         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3089         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3090         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3091
3092         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3093         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3094         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3095         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3096         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3097
3098         if (qman_id == 0) {
3099                 /* Configure RAZWI IRQ */
3100                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3101                 if (hdev->stop_on_err) {
3102                         nic_qm_err_cfg |=
3103                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3104                 }
3105
3106                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3107                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3108                         lower_32_bits(CFG_BASE +
3109                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3110                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3111                         upper_32_bits(CFG_BASE +
3112                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
3113                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3114                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3115                                                                         nic_id);
3116
3117                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3118                                 QM_ARB_ERR_MSG_EN_MASK);
3119
3120                 /* Increase ARB WDT to support streams architecture */
3121                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3122                                 GAUDI_ARB_WDT_TIMEOUT);
3123
3124                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3125                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3126                                 QMAN_INTERNAL_MAKE_TRUSTED);
3127         }
3128 }
3129
3130 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3131 {
3132         struct gaudi_device *gaudi = hdev->asic_specific;
3133         struct gaudi_internal_qman_info *q;
3134         u64 qman_base_addr;
3135         u32 nic_offset = 0;
3136         u32 nic_delta_between_qmans =
3137                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3138         u32 nic_delta_between_nics =
3139                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3140         int i, nic_id, internal_q_index;
3141
3142         if (!hdev->nic_ports_mask)
3143                 return;
3144
3145         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3146                 return;
3147
3148         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3149
3150         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3151                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3152                         nic_offset += nic_delta_between_qmans;
3153                         if (nic_id & 1) {
3154                                 nic_offset -= (nic_delta_between_qmans * 2);
3155                                 nic_offset += nic_delta_between_nics;
3156                         }
3157                         continue;
3158                 }
3159
3160                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3161                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3162                                                 nic_id * QMAN_STREAMS + i;
3163                         q = &gaudi->internal_qmans[internal_q_index];
3164                         qman_base_addr = (u64) q->pq_dma_addr;
3165                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3166                                                 qman_base_addr, nic_id);
3167                 }
3168
3169                 /* Enable the QMAN */
3170                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3171
3172                 nic_offset += nic_delta_between_qmans;
3173                 if (nic_id & 1) {
3174                         nic_offset -= (nic_delta_between_qmans * 2);
3175                         nic_offset += nic_delta_between_nics;
3176                 }
3177
3178                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3179         }
3180 }
3181
3182 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3183 {
3184         struct gaudi_device *gaudi = hdev->asic_specific;
3185
3186         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3187                 return;
3188
3189         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3190         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3191         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3192 }
3193
3194 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3195 {
3196         struct gaudi_device *gaudi = hdev->asic_specific;
3197
3198         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3199                 return;
3200
3201         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3202         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3203         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3204         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3205         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3206 }
3207
3208 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3209 {
3210         struct gaudi_device *gaudi = hdev->asic_specific;
3211
3212         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3213                 return;
3214
3215         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3216         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3217 }
3218
3219 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3220 {
3221         struct gaudi_device *gaudi = hdev->asic_specific;
3222         u32 tpc_offset = 0;
3223         int tpc_id;
3224
3225         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3226                 return;
3227
3228         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3229                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3230                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3231         }
3232 }
3233
3234 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3235 {
3236         struct gaudi_device *gaudi = hdev->asic_specific;
3237         u32 nic_mask, nic_offset = 0;
3238         u32 nic_delta_between_qmans =
3239                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240         u32 nic_delta_between_nics =
3241                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242         int nic_id;
3243
3244         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3245                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3246
3247                 if (gaudi->hw_cap_initialized & nic_mask)
3248                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3249
3250                 nic_offset += nic_delta_between_qmans;
3251                 if (nic_id & 1) {
3252                         nic_offset -= (nic_delta_between_qmans * 2);
3253                         nic_offset += nic_delta_between_nics;
3254                 }
3255         }
3256 }
3257
3258 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3259 {
3260         struct gaudi_device *gaudi = hdev->asic_specific;
3261
3262         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3263                 return;
3264
3265         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3266         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3267         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3268         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3269 }
3270
3271 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3272 {
3273         struct gaudi_device *gaudi = hdev->asic_specific;
3274
3275         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3276                 return;
3277
3278         /* Stop CPs of HBM DMA QMANs */
3279
3280         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3281         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3282         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3283         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3284         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3285 }
3286
3287 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3288 {
3289         struct gaudi_device *gaudi = hdev->asic_specific;
3290
3291         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3292                 return;
3293
3294         /* Stop CPs of MME QMANs */
3295         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3296         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3297 }
3298
3299 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3300 {
3301         struct gaudi_device *gaudi = hdev->asic_specific;
3302
3303         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3304                 return;
3305
3306         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3307         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3308         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3309         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3310         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3311         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3312         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3313         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3314 }
3315
3316 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3317 {
3318         struct gaudi_device *gaudi = hdev->asic_specific;
3319
3320         /* Stop upper CPs of QMANs */
3321
3322         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3323                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3324                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3325                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3326                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3327
3328         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3329                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3330                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3331                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3332                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3333
3334         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3335                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3336                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3337                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3338                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3339
3340         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3341                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3342                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3343                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3344                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3345
3346         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3347                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3348                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3349                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3350                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3351
3352         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3353                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3354                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3355                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3356                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3357
3358         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3359                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3360                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3361                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3362                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3363
3364         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3365                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3366                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3367                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3368                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3369
3370         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3371                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3372                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3373                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3374                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3375
3376         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3377                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3378                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3379                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3380                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3381 }
3382
3383 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3384 {
3385         struct gaudi_device *gaudi = hdev->asic_specific;
3386
3387         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3388                 return;
3389
3390         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3391         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3392         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3393 }
3394
3395 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3396 {
3397         struct gaudi_device *gaudi = hdev->asic_specific;
3398
3399         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3400                 return;
3401
3402         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3403         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3404         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3405         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3406         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3407 }
3408
3409 static void gaudi_mme_stall(struct hl_device *hdev)
3410 {
3411         struct gaudi_device *gaudi = hdev->asic_specific;
3412
3413         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3414                 return;
3415
3416         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3417         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3418         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3419         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3420         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3421         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3422         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3423         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3424         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3425         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3426         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3427         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3428         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3429         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3430         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3431         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3432         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3433 }
3434
3435 static void gaudi_tpc_stall(struct hl_device *hdev)
3436 {
3437         struct gaudi_device *gaudi = hdev->asic_specific;
3438
3439         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3440                 return;
3441
3442         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3443         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3444         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3445         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3446         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3447         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3448         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3449         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3450 }
3451
3452 static void gaudi_set_clock_gating(struct hl_device *hdev)
3453 {
3454         struct gaudi_device *gaudi = hdev->asic_specific;
3455         u32 qman_offset;
3456         bool enable;
3457         int i;
3458
3459         /* In case we are during debug session, don't enable the clock gate
3460          * as it may interfere
3461          */
3462         if (hdev->in_debug)
3463                 return;
3464
3465         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3466                 enable = !!(hdev->clock_gating_mask &
3467                                 (BIT_ULL(gaudi_dma_assignment[i])));
3468
3469                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3470                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3471                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3472                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3473                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3474         }
3475
3476         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3477                 enable = !!(hdev->clock_gating_mask &
3478                                 (BIT_ULL(gaudi_dma_assignment[i])));
3479
3480                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3481                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3482                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3483                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3484                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3485         }
3486
3487         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3488         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3489         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3490
3491         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3492         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3493         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3494
3495         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3496                 enable = !!(hdev->clock_gating_mask &
3497                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3498
3499                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3500                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3501                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3502                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3503
3504                 qman_offset += TPC_QMAN_OFFSET;
3505         }
3506
3507         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3508 }
3509
3510 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3511 {
3512         struct gaudi_device *gaudi = hdev->asic_specific;
3513         u32 qman_offset;
3514         int i;
3515
3516         if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
3517                 return;
3518
3519         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3520                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3521                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3522
3523                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3524         }
3525
3526         WREG32(mmMME0_QM_CGM_CFG, 0);
3527         WREG32(mmMME0_QM_CGM_CFG1, 0);
3528         WREG32(mmMME2_QM_CGM_CFG, 0);
3529         WREG32(mmMME2_QM_CGM_CFG1, 0);
3530
3531         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3532                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3533                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3534
3535                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3536         }
3537
3538         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3539 }
3540
3541 static void gaudi_enable_timestamp(struct hl_device *hdev)
3542 {
3543         /* Disable the timestamp counter */
3544         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3545
3546         /* Zero the lower/upper parts of the 64-bit counter */
3547         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3548         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3549
3550         /* Enable the counter */
3551         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3552 }
3553
3554 static void gaudi_disable_timestamp(struct hl_device *hdev)
3555 {
3556         /* Disable the timestamp counter */
3557         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3558 }
3559
3560 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3561 {
3562         u32 wait_timeout_ms;
3563
3564         dev_info(hdev->dev,
3565                 "Halting compute engines and disabling interrupts\n");
3566
3567         if (hdev->pldm)
3568                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3569         else
3570                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3571
3572         gaudi_stop_nic_qmans(hdev);
3573         gaudi_stop_mme_qmans(hdev);
3574         gaudi_stop_tpc_qmans(hdev);
3575         gaudi_stop_hbm_dma_qmans(hdev);
3576         gaudi_stop_pci_dma_qmans(hdev);
3577
3578         hdev->asic_funcs->disable_clock_gating(hdev);
3579
3580         msleep(wait_timeout_ms);
3581
3582         gaudi_pci_dma_stall(hdev);
3583         gaudi_hbm_dma_stall(hdev);
3584         gaudi_tpc_stall(hdev);
3585         gaudi_mme_stall(hdev);
3586
3587         msleep(wait_timeout_ms);
3588
3589         gaudi_disable_nic_qmans(hdev);
3590         gaudi_disable_mme_qmans(hdev);
3591         gaudi_disable_tpc_qmans(hdev);
3592         gaudi_disable_hbm_dma_qmans(hdev);
3593         gaudi_disable_pci_dma_qmans(hdev);
3594
3595         gaudi_disable_timestamp(hdev);
3596
3597         gaudi_disable_msi(hdev);
3598 }
3599
3600 static int gaudi_mmu_init(struct hl_device *hdev)
3601 {
3602         struct asic_fixed_properties *prop = &hdev->asic_prop;
3603         struct gaudi_device *gaudi = hdev->asic_specific;
3604         u64 hop0_addr;
3605         int rc, i;
3606
3607         if (!hdev->mmu_enable)
3608                 return 0;
3609
3610         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3611                 return 0;
3612
3613         for (i = 0 ; i < prop->max_asid ; i++) {
3614                 hop0_addr = prop->mmu_pgt_addr +
3615                                 (i * prop->mmu_hop_table_size);
3616
3617                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3618                 if (rc) {
3619                         dev_err(hdev->dev,
3620                                 "failed to set hop0 addr for asid %d\n", i);
3621                         goto err;
3622                 }
3623         }
3624
3625         /* init MMU cache manage page */
3626         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3627         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3628
3629         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3630
3631         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3632         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3633
3634         WREG32(mmSTLB_HOP_CONFIGURATION,
3635                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3636
3637         /*
3638          * The H/W expects the first PI after init to be 1. After wraparound
3639          * we'll write 0.
3640          */
3641         gaudi->mmu_cache_inv_pi = 1;
3642
3643         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3644
3645         return 0;
3646
3647 err:
3648         return rc;
3649 }
3650
3651 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3652 {
3653         void __iomem *dst;
3654
3655         /* HBM scrambler must be initialized before pushing F/W to HBM */
3656         gaudi_init_scrambler_hbm(hdev);
3657
3658         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3659
3660         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3661 }
3662
3663 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3664 {
3665         void __iomem *dst;
3666
3667         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3668
3669         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3670 }
3671
3672 static int gaudi_read_device_fw_version(struct hl_device *hdev,
3673                                         enum hl_fw_component fwc)
3674 {
3675         const char *name;
3676         u32 ver_off;
3677         char *dest;
3678
3679         switch (fwc) {
3680         case FW_COMP_UBOOT:
3681                 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3682                 dest = hdev->asic_prop.uboot_ver;
3683                 name = "U-Boot";
3684                 break;
3685         case FW_COMP_PREBOOT:
3686                 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3687                 dest = hdev->asic_prop.preboot_ver;
3688                 name = "Preboot";
3689                 break;
3690         default:
3691                 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
3692                 return -EIO;
3693         }
3694
3695         ver_off &= ~((u32)SRAM_BASE_ADDR);
3696
3697         if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3698                 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3699                                                         VERSION_MAX_LEN);
3700         } else {
3701                 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3702                                                                 name, ver_off);
3703                 strcpy(dest, "unavailable");
3704                 return -EIO;
3705         }
3706
3707         return 0;
3708 }
3709
3710 static int gaudi_init_cpu(struct hl_device *hdev)
3711 {
3712         struct gaudi_device *gaudi = hdev->asic_specific;
3713         int rc;
3714
3715         if (!hdev->cpu_enable)
3716                 return 0;
3717
3718         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3719                 return 0;
3720
3721         /*
3722          * The device CPU works with 40 bits addresses.
3723          * This register sets the extension to 50 bits.
3724          */
3725         if (hdev->asic_prop.fw_security_disabled)
3726                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3727
3728         rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3729                         mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3730                         mmCPU_CMD_STATUS_TO_HOST,
3731                         mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0,
3732                         !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3733                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3734
3735         if (rc)
3736                 return rc;
3737
3738         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3739
3740         return 0;
3741 }
3742
3743 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3744 {
3745         struct gaudi_device *gaudi = hdev->asic_specific;
3746         struct hl_eq *eq;
3747         u32 status;
3748         struct hl_hw_queue *cpu_pq =
3749                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3750         int err;
3751
3752         if (!hdev->cpu_queues_enable)
3753                 return 0;
3754
3755         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3756                 return 0;
3757
3758         eq = &hdev->event_queue;
3759
3760         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3761         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3762
3763         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3764         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3765
3766         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3767                         lower_32_bits(hdev->cpu_accessible_dma_address));
3768         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3769                         upper_32_bits(hdev->cpu_accessible_dma_address));
3770
3771         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3772         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3773         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3774
3775         /* Used for EQ CI */
3776         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3777
3778         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3779
3780         if (gaudi->multi_msi_mode)
3781                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3782         else
3783                 WREG32(mmCPU_IF_QUEUE_INIT,
3784                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3785
3786         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3787
3788         err = hl_poll_timeout(
3789                 hdev,
3790                 mmCPU_IF_QUEUE_INIT,
3791                 status,
3792                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3793                 1000,
3794                 cpu_timeout);
3795
3796         if (err) {
3797                 dev_err(hdev->dev,
3798                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3799                 return -EIO;
3800         }
3801
3802         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3803         return 0;
3804 }
3805
3806 static void gaudi_pre_hw_init(struct hl_device *hdev)
3807 {
3808         /* Perform read from the device to make sure device is up */
3809         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3810
3811         if (hdev->asic_prop.fw_security_disabled) {
3812                 /* Set the access through PCI bars (Linux driver only) as
3813                  * secured
3814                  */
3815                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3816                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3817                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3818
3819                 /* Perform read to flush the waiting writes to ensure
3820                  * configuration was set in the device
3821                  */
3822                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3823         }
3824
3825         /*
3826          * Let's mark in the H/W that we have reached this point. We check
3827          * this value in the reset_before_init function to understand whether
3828          * we need to reset the chip before doing H/W init. This register is
3829          * cleared by the H/W upon H/W reset
3830          */
3831         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3832 }
3833
3834 static int gaudi_hw_init(struct hl_device *hdev)
3835 {
3836         int rc;
3837
3838         gaudi_pre_hw_init(hdev);
3839
3840         gaudi_init_pci_dma_qmans(hdev);
3841
3842         gaudi_init_hbm_dma_qmans(hdev);
3843
3844         rc = gaudi_init_cpu(hdev);
3845         if (rc) {
3846                 dev_err(hdev->dev, "failed to initialize CPU\n");
3847                 return rc;
3848         }
3849
3850         /* SRAM scrambler must be initialized after CPU is running from HBM */
3851         gaudi_init_scrambler_sram(hdev);
3852
3853         /* This is here just in case we are working without CPU */
3854         gaudi_init_scrambler_hbm(hdev);
3855
3856         gaudi_init_golden_registers(hdev);
3857
3858         rc = gaudi_mmu_init(hdev);
3859         if (rc)
3860                 return rc;
3861
3862         gaudi_init_security(hdev);
3863
3864         gaudi_init_mme_qmans(hdev);
3865
3866         gaudi_init_tpc_qmans(hdev);
3867
3868         gaudi_init_nic_qmans(hdev);
3869
3870         hdev->asic_funcs->set_clock_gating(hdev);
3871
3872         gaudi_enable_timestamp(hdev);
3873
3874         /* MSI must be enabled before CPU queues and NIC are initialized */
3875         rc = gaudi_enable_msi(hdev);
3876         if (rc)
3877                 goto disable_queues;
3878
3879         /* must be called after MSI was enabled */
3880         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3881         if (rc) {
3882                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3883                         rc);
3884                 goto disable_msi;
3885         }
3886
3887         /* Perform read from the device to flush all configuration */
3888         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3889
3890         return 0;
3891
3892 disable_msi:
3893         gaudi_disable_msi(hdev);
3894 disable_queues:
3895         gaudi_disable_mme_qmans(hdev);
3896         gaudi_disable_pci_dma_qmans(hdev);
3897
3898         return rc;
3899 }
3900
3901 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3902 {
3903         struct gaudi_device *gaudi = hdev->asic_specific;
3904         u32 status, reset_timeout_ms, cpu_timeout_ms;
3905
3906         if (!hard_reset) {
3907                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3908                 return;
3909         }
3910
3911         if (hdev->pldm) {
3912                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3913                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3914         } else {
3915                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3916                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3917         }
3918
3919         /* Set device to handle FLR by H/W as we will put the device CPU to
3920          * halt mode
3921          */
3922         if (hdev->asic_prop.fw_security_disabled &&
3923                                 !hdev->asic_prop.hard_reset_done_by_fw)
3924                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3925                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3926
3927         /* I don't know what is the state of the CPU so make sure it is
3928          * stopped in any means necessary
3929          */
3930         WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3931
3932         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3933
3934         if (hdev->asic_prop.fw_security_disabled &&
3935                                 !hdev->asic_prop.hard_reset_done_by_fw) {
3936
3937                 /* Configure the reset registers. Must be done as early as
3938                  * possible in case we fail during H/W initialization
3939                  */
3940                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3941                                                 (CFG_RST_H_DMA_MASK |
3942                                                 CFG_RST_H_MME_MASK |
3943                                                 CFG_RST_H_SM_MASK |
3944                                                 CFG_RST_H_TPC_7_MASK));
3945
3946                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3947
3948                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3949                                                 (CFG_RST_H_HBM_MASK |
3950                                                 CFG_RST_H_TPC_7_MASK |
3951                                                 CFG_RST_H_NIC_MASK |
3952                                                 CFG_RST_H_SM_MASK |
3953                                                 CFG_RST_H_DMA_MASK |
3954                                                 CFG_RST_H_MME_MASK |
3955                                                 CFG_RST_H_CPU_MASK |
3956                                                 CFG_RST_H_MMU_MASK));
3957
3958                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3959                                                 (CFG_RST_L_IF_MASK |
3960                                                 CFG_RST_L_PSOC_MASK |
3961                                                 CFG_RST_L_TPC_MASK));
3962
3963                 msleep(cpu_timeout_ms);
3964
3965                 /* Tell ASIC not to re-initialize PCIe */
3966                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3967
3968                 /* Restart BTL/BLR upon hard-reset */
3969                 if (hdev->asic_prop.fw_security_disabled)
3970                         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3971
3972                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3973                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3974         }
3975
3976         dev_info(hdev->dev,
3977                 "Issued HARD reset command, going to wait %dms\n",
3978                 reset_timeout_ms);
3979
3980         /*
3981          * After hard reset, we can't poll the BTM_FSM register because the PSOC
3982          * itself is in reset. Need to wait until the reset is deasserted
3983          */
3984         msleep(reset_timeout_ms);
3985
3986         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3987         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3988                 dev_err(hdev->dev,
3989                         "Timeout while waiting for device to reset 0x%x\n",
3990                         status);
3991
3992         if (gaudi) {
3993                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3994                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
3995                                 HW_CAP_MME | HW_CAP_TPC_MASK |
3996                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
3997                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
3998                                 HW_CAP_SRAM_SCRAMBLER |
3999                                 HW_CAP_HBM_SCRAMBLER |
4000                                 HW_CAP_CLK_GATE);
4001
4002                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4003         }
4004 }
4005
4006 static int gaudi_suspend(struct hl_device *hdev)
4007 {
4008         int rc;
4009
4010         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4011         if (rc)
4012                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4013
4014         return rc;
4015 }
4016
4017 static int gaudi_resume(struct hl_device *hdev)
4018 {
4019         return gaudi_init_iatu(hdev);
4020 }
4021
4022 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4023                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4024 {
4025         int rc;
4026
4027         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4028                         VM_DONTCOPY | VM_NORESERVE;
4029
4030         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
4031         if (rc)
4032                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4033
4034         return rc;
4035 }
4036
4037 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4038 {
4039         struct gaudi_device *gaudi = hdev->asic_specific;
4040         u32 db_reg_offset, db_value, dma_qm_offset, q_off;
4041         int dma_id;
4042         bool invalid_queue = false;
4043
4044         switch (hw_queue_id) {
4045         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4046                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4047                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4048                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4049                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4050                 break;
4051
4052         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4053                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4054                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4055                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4056                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4057                 break;
4058
4059         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4060                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4061                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4062                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4063                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4064                 break;
4065
4066         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4067                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4068                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4069                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4070                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4071                 break;
4072
4073         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4074                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4075                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4076                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4077                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4078                 break;
4079
4080         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4081                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4082                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4083                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4084                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4085                 break;
4086
4087         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4088                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4089                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4090                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4091                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4092                 break;
4093
4094         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4095                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4096                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4097                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4098                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4099                 break;
4100
4101         case GAUDI_QUEUE_ID_CPU_PQ:
4102                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4103                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4104                 else
4105                         invalid_queue = true;
4106                 break;
4107
4108         case GAUDI_QUEUE_ID_MME_0_0:
4109                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4110                 break;
4111
4112         case GAUDI_QUEUE_ID_MME_0_1:
4113                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4114                 break;
4115
4116         case GAUDI_QUEUE_ID_MME_0_2:
4117                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4118                 break;
4119
4120         case GAUDI_QUEUE_ID_MME_0_3:
4121                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4122                 break;
4123
4124         case GAUDI_QUEUE_ID_MME_1_0:
4125                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4126                 break;
4127
4128         case GAUDI_QUEUE_ID_MME_1_1:
4129                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4130                 break;
4131
4132         case GAUDI_QUEUE_ID_MME_1_2:
4133                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4134                 break;
4135
4136         case GAUDI_QUEUE_ID_MME_1_3:
4137                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4138                 break;
4139
4140         case GAUDI_QUEUE_ID_TPC_0_0:
4141                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4142                 break;
4143
4144         case GAUDI_QUEUE_ID_TPC_0_1:
4145                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4146                 break;
4147
4148         case GAUDI_QUEUE_ID_TPC_0_2:
4149                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4150                 break;
4151
4152         case GAUDI_QUEUE_ID_TPC_0_3:
4153                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4154                 break;
4155
4156         case GAUDI_QUEUE_ID_TPC_1_0:
4157                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4158                 break;
4159
4160         case GAUDI_QUEUE_ID_TPC_1_1:
4161                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4162                 break;
4163
4164         case GAUDI_QUEUE_ID_TPC_1_2:
4165                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4166                 break;
4167
4168         case GAUDI_QUEUE_ID_TPC_1_3:
4169                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4170                 break;
4171
4172         case GAUDI_QUEUE_ID_TPC_2_0:
4173                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4174                 break;
4175
4176         case GAUDI_QUEUE_ID_TPC_2_1:
4177                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4178                 break;
4179
4180         case GAUDI_QUEUE_ID_TPC_2_2:
4181                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4182                 break;
4183
4184         case GAUDI_QUEUE_ID_TPC_2_3:
4185                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4186                 break;
4187
4188         case GAUDI_QUEUE_ID_TPC_3_0:
4189                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4190                 break;
4191
4192         case GAUDI_QUEUE_ID_TPC_3_1:
4193                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4194                 break;
4195
4196         case GAUDI_QUEUE_ID_TPC_3_2:
4197                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4198                 break;
4199
4200         case GAUDI_QUEUE_ID_TPC_3_3:
4201                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4202                 break;
4203
4204         case GAUDI_QUEUE_ID_TPC_4_0:
4205                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4206                 break;
4207
4208         case GAUDI_QUEUE_ID_TPC_4_1:
4209                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4210                 break;
4211
4212         case GAUDI_QUEUE_ID_TPC_4_2:
4213                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4214                 break;
4215
4216         case GAUDI_QUEUE_ID_TPC_4_3:
4217                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4218                 break;
4219
4220         case GAUDI_QUEUE_ID_TPC_5_0:
4221                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4222                 break;
4223
4224         case GAUDI_QUEUE_ID_TPC_5_1:
4225                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4226                 break;
4227
4228         case GAUDI_QUEUE_ID_TPC_5_2:
4229                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4230                 break;
4231
4232         case GAUDI_QUEUE_ID_TPC_5_3:
4233                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4234                 break;
4235
4236         case GAUDI_QUEUE_ID_TPC_6_0:
4237                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4238                 break;
4239
4240         case GAUDI_QUEUE_ID_TPC_6_1:
4241                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4242                 break;
4243
4244         case GAUDI_QUEUE_ID_TPC_6_2:
4245                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4246                 break;
4247
4248         case GAUDI_QUEUE_ID_TPC_6_3:
4249                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4250                 break;
4251
4252         case GAUDI_QUEUE_ID_TPC_7_0:
4253                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4254                 break;
4255
4256         case GAUDI_QUEUE_ID_TPC_7_1:
4257                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4258                 break;
4259
4260         case GAUDI_QUEUE_ID_TPC_7_2:
4261                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4262                 break;
4263
4264         case GAUDI_QUEUE_ID_TPC_7_3:
4265                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4266                 break;
4267
4268         case GAUDI_QUEUE_ID_NIC_0_0:
4269                 db_reg_offset = mmNIC0_QM0_PQ_PI_0;
4270                 break;
4271
4272         case GAUDI_QUEUE_ID_NIC_0_1:
4273                 db_reg_offset = mmNIC0_QM0_PQ_PI_1;
4274                 break;
4275
4276         case GAUDI_QUEUE_ID_NIC_0_2:
4277                 db_reg_offset = mmNIC0_QM0_PQ_PI_2;
4278                 break;
4279
4280         case GAUDI_QUEUE_ID_NIC_0_3:
4281                 db_reg_offset = mmNIC0_QM0_PQ_PI_3;
4282                 break;
4283
4284         case GAUDI_QUEUE_ID_NIC_1_0:
4285                 db_reg_offset = mmNIC0_QM1_PQ_PI_0;
4286                 break;
4287
4288         case GAUDI_QUEUE_ID_NIC_1_1:
4289                 db_reg_offset = mmNIC0_QM1_PQ_PI_1;
4290                 break;
4291
4292         case GAUDI_QUEUE_ID_NIC_1_2:
4293                 db_reg_offset = mmNIC0_QM1_PQ_PI_2;
4294                 break;
4295
4296         case GAUDI_QUEUE_ID_NIC_1_3:
4297                 db_reg_offset = mmNIC0_QM1_PQ_PI_3;
4298                 break;
4299
4300         case GAUDI_QUEUE_ID_NIC_2_0:
4301                 db_reg_offset = mmNIC1_QM0_PQ_PI_0;
4302                 break;
4303
4304         case GAUDI_QUEUE_ID_NIC_2_1:
4305                 db_reg_offset = mmNIC1_QM0_PQ_PI_1;
4306                 break;
4307
4308         case GAUDI_QUEUE_ID_NIC_2_2:
4309                 db_reg_offset = mmNIC1_QM0_PQ_PI_2;
4310                 break;
4311
4312         case GAUDI_QUEUE_ID_NIC_2_3:
4313                 db_reg_offset = mmNIC1_QM0_PQ_PI_3;
4314                 break;
4315
4316         case GAUDI_QUEUE_ID_NIC_3_0:
4317                 db_reg_offset = mmNIC1_QM1_PQ_PI_0;
4318                 break;
4319
4320         case GAUDI_QUEUE_ID_NIC_3_1:
4321                 db_reg_offset = mmNIC1_QM1_PQ_PI_1;
4322                 break;
4323
4324         case GAUDI_QUEUE_ID_NIC_3_2:
4325                 db_reg_offset = mmNIC1_QM1_PQ_PI_2;
4326                 break;
4327
4328         case GAUDI_QUEUE_ID_NIC_3_3:
4329                 db_reg_offset = mmNIC1_QM1_PQ_PI_3;
4330                 break;
4331
4332         case GAUDI_QUEUE_ID_NIC_4_0:
4333                 db_reg_offset = mmNIC2_QM0_PQ_PI_0;
4334                 break;
4335
4336         case GAUDI_QUEUE_ID_NIC_4_1:
4337                 db_reg_offset = mmNIC2_QM0_PQ_PI_1;
4338                 break;
4339
4340         case GAUDI_QUEUE_ID_NIC_4_2:
4341                 db_reg_offset = mmNIC2_QM0_PQ_PI_2;
4342                 break;
4343
4344         case GAUDI_QUEUE_ID_NIC_4_3:
4345                 db_reg_offset = mmNIC2_QM0_PQ_PI_3;
4346                 break;
4347
4348         case GAUDI_QUEUE_ID_NIC_5_0:
4349                 db_reg_offset = mmNIC2_QM1_PQ_PI_0;
4350                 break;
4351
4352         case GAUDI_QUEUE_ID_NIC_5_1:
4353                 db_reg_offset = mmNIC2_QM1_PQ_PI_1;
4354                 break;
4355
4356         case GAUDI_QUEUE_ID_NIC_5_2:
4357                 db_reg_offset = mmNIC2_QM1_PQ_PI_2;
4358                 break;
4359
4360         case GAUDI_QUEUE_ID_NIC_5_3:
4361                 db_reg_offset = mmNIC2_QM1_PQ_PI_3;
4362                 break;
4363
4364         case GAUDI_QUEUE_ID_NIC_6_0:
4365                 db_reg_offset = mmNIC3_QM0_PQ_PI_0;
4366                 break;
4367
4368         case GAUDI_QUEUE_ID_NIC_6_1:
4369                 db_reg_offset = mmNIC3_QM0_PQ_PI_1;
4370                 break;
4371
4372         case GAUDI_QUEUE_ID_NIC_6_2:
4373                 db_reg_offset = mmNIC3_QM0_PQ_PI_2;
4374                 break;
4375
4376         case GAUDI_QUEUE_ID_NIC_6_3:
4377                 db_reg_offset = mmNIC3_QM0_PQ_PI_3;
4378                 break;
4379
4380         case GAUDI_QUEUE_ID_NIC_7_0:
4381                 db_reg_offset = mmNIC3_QM1_PQ_PI_0;
4382                 break;
4383
4384         case GAUDI_QUEUE_ID_NIC_7_1:
4385                 db_reg_offset = mmNIC3_QM1_PQ_PI_1;
4386                 break;
4387
4388         case GAUDI_QUEUE_ID_NIC_7_2:
4389                 db_reg_offset = mmNIC3_QM1_PQ_PI_2;
4390                 break;
4391
4392         case GAUDI_QUEUE_ID_NIC_7_3:
4393                 db_reg_offset = mmNIC3_QM1_PQ_PI_3;
4394                 break;
4395
4396         case GAUDI_QUEUE_ID_NIC_8_0:
4397                 db_reg_offset = mmNIC4_QM0_PQ_PI_0;
4398                 break;
4399
4400         case GAUDI_QUEUE_ID_NIC_8_1:
4401                 db_reg_offset = mmNIC4_QM0_PQ_PI_1;
4402                 break;
4403
4404         case GAUDI_QUEUE_ID_NIC_8_2:
4405                 db_reg_offset = mmNIC4_QM0_PQ_PI_2;
4406                 break;
4407
4408         case GAUDI_QUEUE_ID_NIC_8_3:
4409                 db_reg_offset = mmNIC4_QM0_PQ_PI_3;
4410                 break;
4411
4412         case GAUDI_QUEUE_ID_NIC_9_0:
4413                 db_reg_offset = mmNIC4_QM1_PQ_PI_0;
4414                 break;
4415
4416         case GAUDI_QUEUE_ID_NIC_9_1:
4417                 db_reg_offset = mmNIC4_QM1_PQ_PI_1;
4418                 break;
4419
4420         case GAUDI_QUEUE_ID_NIC_9_2:
4421                 db_reg_offset = mmNIC4_QM1_PQ_PI_2;
4422                 break;
4423
4424         case GAUDI_QUEUE_ID_NIC_9_3:
4425                 db_reg_offset = mmNIC4_QM1_PQ_PI_3;
4426                 break;
4427
4428         default:
4429                 invalid_queue = true;
4430         }
4431
4432         if (invalid_queue) {
4433                 /* Should never get here */
4434                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4435                         hw_queue_id);
4436                 return;
4437         }
4438
4439         db_value = pi;
4440
4441         /* ring the doorbell */
4442         WREG32(db_reg_offset, db_value);
4443
4444         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
4445                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
4446                                 GAUDI_EVENT_PI_UPDATE);
4447 }
4448
4449 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4450                                 struct hl_bd *bd)
4451 {
4452         __le64 *pbd = (__le64 *) bd;
4453
4454         /* The QMANs are on the host memory so a simple copy suffice */
4455         pqe[0] = pbd[0];
4456         pqe[1] = pbd[1];
4457 }
4458
4459 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4460                                         dma_addr_t *dma_handle, gfp_t flags)
4461 {
4462         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4463                                                 dma_handle, flags);
4464
4465         /* Shift to the device's base physical address of host memory */
4466         if (kernel_addr)
4467                 *dma_handle += HOST_PHYS_BASE;
4468
4469         return kernel_addr;
4470 }
4471
4472 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4473                 void *cpu_addr, dma_addr_t dma_handle)
4474 {
4475         /* Cancel the device's base physical address of host memory */
4476         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4477
4478         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4479 }
4480
4481 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4482 {
4483         struct asic_fixed_properties *prop = &hdev->asic_prop;
4484         u64  cur_addr = DRAM_BASE_ADDR_USER;
4485         u32 val;
4486         u32 chunk_size;
4487         int rc, dma_id;
4488
4489         while (cur_addr < prop->dram_end_address) {
4490                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4491                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4492
4493                         chunk_size =
4494                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4495
4496                         dev_dbg(hdev->dev,
4497                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4498                                 cur_addr, cur_addr + chunk_size);
4499
4500                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4501                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4502                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4503                                                 lower_32_bits(cur_addr));
4504                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4505                                                 upper_32_bits(cur_addr));
4506                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4507                                         chunk_size);
4508                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4509                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4510                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4511
4512                         cur_addr += chunk_size;
4513
4514                         if (cur_addr == prop->dram_end_address)
4515                                 break;
4516                 }
4517
4518                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4519                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4520
4521                         rc = hl_poll_timeout(
4522                                 hdev,
4523                                 mmDMA0_CORE_STS0 + dma_offset,
4524                                 val,
4525                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4526                                 1000,
4527                                 HBM_SCRUBBING_TIMEOUT_US);
4528
4529                         if (rc) {
4530                                 dev_err(hdev->dev,
4531                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4532                                         dma_id);
4533                                 return -EIO;
4534                         }
4535                 }
4536         }
4537
4538         return 0;
4539 }
4540
4541 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4542 {
4543         struct asic_fixed_properties *prop = &hdev->asic_prop;
4544         struct gaudi_device *gaudi = hdev->asic_specific;
4545         u64 idle_mask = 0;
4546         int rc = 0;
4547         u64 val = 0;
4548
4549         if (!hdev->memory_scrub)
4550                 return 0;
4551
4552         if (!addr && !size) {
4553                 /* Wait till device is idle */
4554                 rc = hl_poll_timeout(
4555                                 hdev,
4556                                 mmDMA0_CORE_STS0/* dummy */,
4557                                 val/* dummy */,
4558                                 (hdev->asic_funcs->is_device_idle(hdev,
4559                                                 &idle_mask, NULL)),
4560                                                 1000,
4561                                                 HBM_SCRUBBING_TIMEOUT_US);
4562                 if (rc) {
4563                         dev_err(hdev->dev, "waiting for idle timeout\n");
4564                         return -EIO;
4565                 }
4566
4567                 /* Scrub SRAM */
4568                 addr = prop->sram_user_base_address;
4569                 size = hdev->pldm ? 0x10000 :
4570                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4571                 val = 0x7777777777777777ull;
4572
4573                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4574                 if (rc) {
4575                         dev_err(hdev->dev,
4576                                 "Failed to clear SRAM in mem scrub all\n");
4577                         return rc;
4578                 }
4579
4580                 mutex_lock(&gaudi->clk_gate_mutex);
4581                 hdev->asic_funcs->disable_clock_gating(hdev);
4582
4583                 /* Scrub HBM using all DMA channels in parallel */
4584                 rc = gaudi_hbm_scrubbing(hdev);
4585                 if (rc)
4586                         dev_err(hdev->dev,
4587                                 "Failed to clear HBM in mem scrub all\n");
4588
4589                 hdev->asic_funcs->set_clock_gating(hdev);
4590                 mutex_unlock(&gaudi->clk_gate_mutex);
4591         }
4592
4593         return rc;
4594 }
4595
4596 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4597                                 u32 queue_id, dma_addr_t *dma_handle,
4598                                 u16 *queue_len)
4599 {
4600         struct gaudi_device *gaudi = hdev->asic_specific;
4601         struct gaudi_internal_qman_info *q;
4602
4603         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4604                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4605                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4606                 return NULL;
4607         }
4608
4609         q = &gaudi->internal_qmans[queue_id];
4610         *dma_handle = q->pq_dma_addr;
4611         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4612
4613         return q->pq_kernel_addr;
4614 }
4615
4616 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4617                                 u16 len, u32 timeout, u64 *result)
4618 {
4619         struct gaudi_device *gaudi = hdev->asic_specific;
4620
4621         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4622                 if (result)
4623                         *result = 0;
4624                 return 0;
4625         }
4626
4627         if (!timeout)
4628                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4629
4630         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4631                                                 timeout, result);
4632 }
4633
4634 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4635 {
4636         struct packet_msg_prot *fence_pkt;
4637         dma_addr_t pkt_dma_addr;
4638         u32 fence_val, tmp, timeout_usec;
4639         dma_addr_t fence_dma_addr;
4640         u32 *fence_ptr;
4641         int rc;
4642
4643         if (hdev->pldm)
4644                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4645         else
4646                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4647
4648         fence_val = GAUDI_QMAN0_FENCE_VAL;
4649
4650         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4651                                                         &fence_dma_addr);
4652         if (!fence_ptr) {
4653                 dev_err(hdev->dev,
4654                         "Failed to allocate memory for H/W queue %d testing\n",
4655                         hw_queue_id);
4656                 return -ENOMEM;
4657         }
4658
4659         *fence_ptr = 0;
4660
4661         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4662                                         sizeof(struct packet_msg_prot),
4663                                         GFP_KERNEL, &pkt_dma_addr);
4664         if (!fence_pkt) {
4665                 dev_err(hdev->dev,
4666                         "Failed to allocate packet for H/W queue %d testing\n",
4667                         hw_queue_id);
4668                 rc = -ENOMEM;
4669                 goto free_fence_ptr;
4670         }
4671
4672         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4673         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4674         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4675
4676         fence_pkt->ctl = cpu_to_le32(tmp);
4677         fence_pkt->value = cpu_to_le32(fence_val);
4678         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4679
4680         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4681                                         sizeof(struct packet_msg_prot),
4682                                         pkt_dma_addr);
4683         if (rc) {
4684                 dev_err(hdev->dev,
4685                         "Failed to send fence packet to H/W queue %d\n",
4686                         hw_queue_id);
4687                 goto free_pkt;
4688         }
4689
4690         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4691                                         1000, timeout_usec, true);
4692
4693         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4694
4695         if (rc == -ETIMEDOUT) {
4696                 dev_err(hdev->dev,
4697                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4698                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4699                 rc = -EIO;
4700         }
4701
4702 free_pkt:
4703         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4704                                         pkt_dma_addr);
4705 free_fence_ptr:
4706         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4707                                         fence_dma_addr);
4708         return rc;
4709 }
4710
4711 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4712 {
4713         struct gaudi_device *gaudi = hdev->asic_specific;
4714
4715         /*
4716          * check capability here as send_cpu_message() won't update the result
4717          * value if no capability
4718          */
4719         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4720                 return 0;
4721
4722         return hl_fw_test_cpu_queue(hdev);
4723 }
4724
4725 static int gaudi_test_queues(struct hl_device *hdev)
4726 {
4727         int i, rc, ret_val = 0;
4728
4729         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4730                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4731                         rc = gaudi_test_queue(hdev, i);
4732                         if (rc)
4733                                 ret_val = -EINVAL;
4734                 }
4735         }
4736
4737         rc = gaudi_test_cpu_queue(hdev);
4738         if (rc)
4739                 ret_val = -EINVAL;
4740
4741         return ret_val;
4742 }
4743
4744 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4745                 gfp_t mem_flags, dma_addr_t *dma_handle)
4746 {
4747         void *kernel_addr;
4748
4749         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4750                 return NULL;
4751
4752         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4753
4754         /* Shift to the device's base physical address of host memory */
4755         if (kernel_addr)
4756                 *dma_handle += HOST_PHYS_BASE;
4757
4758         return kernel_addr;
4759 }
4760
4761 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4762                         dma_addr_t dma_addr)
4763 {
4764         /* Cancel the device's base physical address of host memory */
4765         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4766
4767         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4768 }
4769
4770 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4771                                         size_t size, dma_addr_t *dma_handle)
4772 {
4773         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4774 }
4775
4776 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4777                                                 size_t size, void *vaddr)
4778 {
4779         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4780 }
4781
4782 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4783                         int nents, enum dma_data_direction dir)
4784 {
4785         struct scatterlist *sg;
4786         int i;
4787
4788         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4789                 return -ENOMEM;
4790
4791         /* Shift to the device's base physical address of host memory */
4792         for_each_sg(sgl, sg, nents, i)
4793                 sg->dma_address += HOST_PHYS_BASE;
4794
4795         return 0;
4796 }
4797
4798 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4799                         int nents, enum dma_data_direction dir)
4800 {
4801         struct scatterlist *sg;
4802         int i;
4803
4804         /* Cancel the device's base physical address of host memory */
4805         for_each_sg(sgl, sg, nents, i)
4806                 sg->dma_address -= HOST_PHYS_BASE;
4807
4808         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4809 }
4810
4811 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4812                                         struct sg_table *sgt)
4813 {
4814         struct scatterlist *sg, *sg_next_iter;
4815         u32 count, dma_desc_cnt;
4816         u64 len, len_next;
4817         dma_addr_t addr, addr_next;
4818
4819         dma_desc_cnt = 0;
4820
4821         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4822
4823                 len = sg_dma_len(sg);
4824                 addr = sg_dma_address(sg);
4825
4826                 if (len == 0)
4827                         break;
4828
4829                 while ((count + 1) < sgt->nents) {
4830                         sg_next_iter = sg_next(sg);
4831                         len_next = sg_dma_len(sg_next_iter);
4832                         addr_next = sg_dma_address(sg_next_iter);
4833
4834                         if (len_next == 0)
4835                                 break;
4836
4837                         if ((addr + len == addr_next) &&
4838                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4839                                 len += len_next;
4840                                 count++;
4841                                 sg = sg_next_iter;
4842                         } else {
4843                                 break;
4844                         }
4845                 }
4846
4847                 dma_desc_cnt++;
4848         }
4849
4850         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4851 }
4852
4853 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4854                                 struct hl_cs_parser *parser,
4855                                 struct packet_lin_dma *user_dma_pkt,
4856                                 u64 addr, enum dma_data_direction dir)
4857 {
4858         struct hl_userptr *userptr;
4859         int rc;
4860
4861         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4862                         parser->job_userptr_list, &userptr))
4863                 goto already_pinned;
4864
4865         userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
4866         if (!userptr)
4867                 return -ENOMEM;
4868
4869         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4870                                 userptr);
4871         if (rc)
4872                 goto free_userptr;
4873
4874         list_add_tail(&userptr->job_node, parser->job_userptr_list);
4875
4876         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
4877                                         userptr->sgt->nents, dir);
4878         if (rc) {
4879                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4880                 goto unpin_memory;
4881         }
4882
4883         userptr->dma_mapped = true;
4884         userptr->dir = dir;
4885
4886 already_pinned:
4887         parser->patched_cb_size +=
4888                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4889
4890         return 0;
4891
4892 unpin_memory:
4893         hl_unpin_host_memory(hdev, userptr);
4894 free_userptr:
4895         kfree(userptr);
4896         return rc;
4897 }
4898
4899 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4900                                 struct hl_cs_parser *parser,
4901                                 struct packet_lin_dma *user_dma_pkt,
4902                                 bool src_in_host)
4903 {
4904         enum dma_data_direction dir;
4905         bool skip_host_mem_pin = false, user_memset;
4906         u64 addr;
4907         int rc = 0;
4908
4909         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4910                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4911                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4912
4913         if (src_in_host) {
4914                 if (user_memset)
4915                         skip_host_mem_pin = true;
4916
4917                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4918                 dir = DMA_TO_DEVICE;
4919                 addr = le64_to_cpu(user_dma_pkt->src_addr);
4920         } else {
4921                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4922                 dir = DMA_FROM_DEVICE;
4923                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4924                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4925                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4926         }
4927
4928         if (skip_host_mem_pin)
4929                 parser->patched_cb_size += sizeof(*user_dma_pkt);
4930         else
4931                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4932                                                 addr, dir);
4933
4934         return rc;
4935 }
4936
4937 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4938                                 struct hl_cs_parser *parser,
4939                                 struct packet_lin_dma *user_dma_pkt)
4940 {
4941         bool src_in_host = false;
4942         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4943                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4944                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4945
4946         dev_dbg(hdev->dev, "DMA packet details:\n");
4947         dev_dbg(hdev->dev, "source == 0x%llx\n",
4948                                 le64_to_cpu(user_dma_pkt->src_addr));
4949         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4950         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4951
4952         /*
4953          * Special handling for DMA with size 0. Bypass all validations
4954          * because no transactions will be done except for WR_COMP, which
4955          * is not a security issue
4956          */
4957         if (!le32_to_cpu(user_dma_pkt->tsize)) {
4958                 parser->patched_cb_size += sizeof(*user_dma_pkt);
4959                 return 0;
4960         }
4961
4962         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4963                 src_in_host = true;
4964
4965         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4966                                                 src_in_host);
4967 }
4968
4969 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4970                                         struct hl_cs_parser *parser,
4971                                         struct packet_load_and_exe *user_pkt)
4972 {
4973         u32 cfg;
4974
4975         cfg = le32_to_cpu(user_pkt->cfg);
4976
4977         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
4978                 dev_err(hdev->dev,
4979                         "User not allowed to use Load and Execute\n");
4980                 return -EPERM;
4981         }
4982
4983         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
4984
4985         return 0;
4986 }
4987
4988 static int gaudi_validate_cb(struct hl_device *hdev,
4989                         struct hl_cs_parser *parser, bool is_mmu)
4990 {
4991         u32 cb_parsed_length = 0;
4992         int rc = 0;
4993
4994         parser->patched_cb_size = 0;
4995
4996         /* cb_user_size is more than 0 so loop will always be executed */
4997         while (cb_parsed_length < parser->user_cb_size) {
4998                 enum packet_id pkt_id;
4999                 u16 pkt_size;
5000                 struct gaudi_packet *user_pkt;
5001
5002                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5003
5004                 pkt_id = (enum packet_id) (
5005                                 (le64_to_cpu(user_pkt->header) &
5006                                 PACKET_HEADER_PACKET_ID_MASK) >>
5007                                         PACKET_HEADER_PACKET_ID_SHIFT);
5008
5009                 if (!validate_packet_id(pkt_id)) {
5010                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5011                         rc = -EINVAL;
5012                         break;
5013                 }
5014
5015                 pkt_size = gaudi_packet_sizes[pkt_id];
5016                 cb_parsed_length += pkt_size;
5017                 if (cb_parsed_length > parser->user_cb_size) {
5018                         dev_err(hdev->dev,
5019                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5020                         rc = -EINVAL;
5021                         break;
5022                 }
5023
5024                 switch (pkt_id) {
5025                 case PACKET_MSG_PROT:
5026                         dev_err(hdev->dev,
5027                                 "User not allowed to use MSG_PROT\n");
5028                         rc = -EPERM;
5029                         break;
5030
5031                 case PACKET_CP_DMA:
5032                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5033                         rc = -EPERM;
5034                         break;
5035
5036                 case PACKET_STOP:
5037                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5038                         rc = -EPERM;
5039                         break;
5040
5041                 case PACKET_WREG_BULK:
5042                         dev_err(hdev->dev,
5043                                 "User not allowed to use WREG_BULK\n");
5044                         rc = -EPERM;
5045                         break;
5046
5047                 case PACKET_LOAD_AND_EXE:
5048                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5049                                 (struct packet_load_and_exe *) user_pkt);
5050                         break;
5051
5052                 case PACKET_LIN_DMA:
5053                         parser->contains_dma_pkt = true;
5054                         if (is_mmu)
5055                                 parser->patched_cb_size += pkt_size;
5056                         else
5057                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5058                                         (struct packet_lin_dma *) user_pkt);
5059                         break;
5060
5061                 case PACKET_WREG_32:
5062                 case PACKET_MSG_LONG:
5063                 case PACKET_MSG_SHORT:
5064                 case PACKET_REPEAT:
5065                 case PACKET_FENCE:
5066                 case PACKET_NOP:
5067                 case PACKET_ARB_POINT:
5068                         parser->patched_cb_size += pkt_size;
5069                         break;
5070
5071                 default:
5072                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5073                                 pkt_id);
5074                         rc = -EINVAL;
5075                         break;
5076                 }
5077
5078                 if (rc)
5079                         break;
5080         }
5081
5082         /*
5083          * The new CB should have space at the end for two MSG_PROT packets:
5084          * 1. A packet that will act as a completion packet
5085          * 2. A packet that will generate MSI-X interrupt
5086          */
5087         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5088
5089         return rc;
5090 }
5091
5092 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5093                                 struct hl_cs_parser *parser,
5094                                 struct packet_lin_dma *user_dma_pkt,
5095                                 struct packet_lin_dma *new_dma_pkt,
5096                                 u32 *new_dma_pkt_size)
5097 {
5098         struct hl_userptr *userptr;
5099         struct scatterlist *sg, *sg_next_iter;
5100         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5101         u64 len, len_next;
5102         dma_addr_t dma_addr, dma_addr_next;
5103         u64 device_memory_addr, addr;
5104         enum dma_data_direction dir;
5105         struct sg_table *sgt;
5106         bool src_in_host = false;
5107         bool skip_host_mem_pin = false;
5108         bool user_memset;
5109
5110         ctl = le32_to_cpu(user_dma_pkt->ctl);
5111
5112         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5113                 src_in_host = true;
5114
5115         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5116                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5117
5118         if (src_in_host) {
5119                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5120                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5121                 dir = DMA_TO_DEVICE;
5122                 if (user_memset)
5123                         skip_host_mem_pin = true;
5124         } else {
5125                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5126                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5127                 dir = DMA_FROM_DEVICE;
5128         }
5129
5130         if ((!skip_host_mem_pin) &&
5131                 (!hl_userptr_is_pinned(hdev, addr,
5132                                         le32_to_cpu(user_dma_pkt->tsize),
5133                                         parser->job_userptr_list, &userptr))) {
5134                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5135                                 addr, user_dma_pkt->tsize);
5136                 return -EFAULT;
5137         }
5138
5139         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5140                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5141                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5142                 return 0;
5143         }
5144
5145         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5146
5147         sgt = userptr->sgt;
5148         dma_desc_cnt = 0;
5149
5150         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5151                 len = sg_dma_len(sg);
5152                 dma_addr = sg_dma_address(sg);
5153
5154                 if (len == 0)
5155                         break;
5156
5157                 while ((count + 1) < sgt->nents) {
5158                         sg_next_iter = sg_next(sg);
5159                         len_next = sg_dma_len(sg_next_iter);
5160                         dma_addr_next = sg_dma_address(sg_next_iter);
5161
5162                         if (len_next == 0)
5163                                 break;
5164
5165                         if ((dma_addr + len == dma_addr_next) &&
5166                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5167                                 len += len_next;
5168                                 count++;
5169                                 sg = sg_next_iter;
5170                         } else {
5171                                 break;
5172                         }
5173                 }
5174
5175                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5176                 if (likely(dma_desc_cnt))
5177                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5178                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5179                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5180                 new_dma_pkt->tsize = cpu_to_le32(len);
5181
5182                 if (dir == DMA_TO_DEVICE) {
5183                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5184                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5185                 } else {
5186                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5187                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5188                 }
5189
5190                 if (!user_memset)
5191                         device_memory_addr += len;
5192                 dma_desc_cnt++;
5193                 new_dma_pkt++;
5194         }
5195
5196         if (!dma_desc_cnt) {
5197                 dev_err(hdev->dev,
5198                         "Error of 0 SG entries when patching DMA packet\n");
5199                 return -EFAULT;
5200         }
5201
5202         /* Fix the last dma packet - wrcomp must be as user set it */
5203         new_dma_pkt--;
5204         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5205
5206         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5207
5208         return 0;
5209 }
5210
5211 static int gaudi_patch_cb(struct hl_device *hdev,
5212                                 struct hl_cs_parser *parser)
5213 {
5214         u32 cb_parsed_length = 0;
5215         u32 cb_patched_cur_length = 0;
5216         int rc = 0;
5217
5218         /* cb_user_size is more than 0 so loop will always be executed */
5219         while (cb_parsed_length < parser->user_cb_size) {
5220                 enum packet_id pkt_id;
5221                 u16 pkt_size;
5222                 u32 new_pkt_size = 0;
5223                 struct gaudi_packet *user_pkt, *kernel_pkt;
5224
5225                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5226                 kernel_pkt = parser->patched_cb->kernel_address +
5227                                         cb_patched_cur_length;
5228
5229                 pkt_id = (enum packet_id) (
5230                                 (le64_to_cpu(user_pkt->header) &
5231                                 PACKET_HEADER_PACKET_ID_MASK) >>
5232                                         PACKET_HEADER_PACKET_ID_SHIFT);
5233
5234                 if (!validate_packet_id(pkt_id)) {
5235                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5236                         rc = -EINVAL;
5237                         break;
5238                 }
5239
5240                 pkt_size = gaudi_packet_sizes[pkt_id];
5241                 cb_parsed_length += pkt_size;
5242                 if (cb_parsed_length > parser->user_cb_size) {
5243                         dev_err(hdev->dev,
5244                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5245                         rc = -EINVAL;
5246                         break;
5247                 }
5248
5249                 switch (pkt_id) {
5250                 case PACKET_LIN_DMA:
5251                         rc = gaudi_patch_dma_packet(hdev, parser,
5252                                         (struct packet_lin_dma *) user_pkt,
5253                                         (struct packet_lin_dma *) kernel_pkt,
5254                                         &new_pkt_size);
5255                         cb_patched_cur_length += new_pkt_size;
5256                         break;
5257
5258                 case PACKET_MSG_PROT:
5259                         dev_err(hdev->dev,
5260                                 "User not allowed to use MSG_PROT\n");
5261                         rc = -EPERM;
5262                         break;
5263
5264                 case PACKET_CP_DMA:
5265                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5266                         rc = -EPERM;
5267                         break;
5268
5269                 case PACKET_STOP:
5270                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5271                         rc = -EPERM;
5272                         break;
5273
5274                 case PACKET_WREG_32:
5275                 case PACKET_WREG_BULK:
5276                 case PACKET_MSG_LONG:
5277                 case PACKET_MSG_SHORT:
5278                 case PACKET_REPEAT:
5279                 case PACKET_FENCE:
5280                 case PACKET_NOP:
5281                 case PACKET_ARB_POINT:
5282                 case PACKET_LOAD_AND_EXE:
5283                         memcpy(kernel_pkt, user_pkt, pkt_size);
5284                         cb_patched_cur_length += pkt_size;
5285                         break;
5286
5287                 default:
5288                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5289                                 pkt_id);
5290                         rc = -EINVAL;
5291                         break;
5292                 }
5293
5294                 if (rc)
5295                         break;
5296         }
5297
5298         return rc;
5299 }
5300
5301 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5302                 struct hl_cs_parser *parser)
5303 {
5304         u64 patched_cb_handle;
5305         u32 patched_cb_size;
5306         struct hl_cb *user_cb;
5307         int rc;
5308
5309         /*
5310          * The new CB should have space at the end for two MSG_PROT pkt:
5311          * 1. A packet that will act as a completion packet
5312          * 2. A packet that will generate MSI interrupt
5313          */
5314         parser->patched_cb_size = parser->user_cb_size +
5315                         sizeof(struct packet_msg_prot) * 2;
5316
5317         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5318                                 parser->patched_cb_size, false, false,
5319                                 &patched_cb_handle);
5320
5321         if (rc) {
5322                 dev_err(hdev->dev,
5323                         "Failed to allocate patched CB for DMA CS %d\n",
5324                         rc);
5325                 return rc;
5326         }
5327
5328         patched_cb_handle >>= PAGE_SHIFT;
5329         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5330                                 (u32) patched_cb_handle);
5331         /* hl_cb_get should never fail here so use kernel WARN */
5332         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5333                         (u32) patched_cb_handle);
5334         if (!parser->patched_cb) {
5335                 rc = -EFAULT;
5336                 goto out;
5337         }
5338
5339         /*
5340          * The check that parser->user_cb_size <= parser->user_cb->size was done
5341          * in validate_queue_index().
5342          */
5343         memcpy(parser->patched_cb->kernel_address,
5344                 parser->user_cb->kernel_address,
5345                 parser->user_cb_size);
5346
5347         patched_cb_size = parser->patched_cb_size;
5348
5349         /* Validate patched CB instead of user CB */
5350         user_cb = parser->user_cb;
5351         parser->user_cb = parser->patched_cb;
5352         rc = gaudi_validate_cb(hdev, parser, true);
5353         parser->user_cb = user_cb;
5354
5355         if (rc) {
5356                 hl_cb_put(parser->patched_cb);
5357                 goto out;
5358         }
5359
5360         if (patched_cb_size != parser->patched_cb_size) {
5361                 dev_err(hdev->dev, "user CB size mismatch\n");
5362                 hl_cb_put(parser->patched_cb);
5363                 rc = -EINVAL;
5364                 goto out;
5365         }
5366
5367 out:
5368         /*
5369          * Always call cb destroy here because we still have 1 reference
5370          * to it by calling cb_get earlier. After the job will be completed,
5371          * cb_put will release it, but here we want to remove it from the
5372          * idr
5373          */
5374         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5375                                         patched_cb_handle << PAGE_SHIFT);
5376
5377         return rc;
5378 }
5379
5380 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5381                 struct hl_cs_parser *parser)
5382 {
5383         u64 patched_cb_handle;
5384         int rc;
5385
5386         rc = gaudi_validate_cb(hdev, parser, false);
5387
5388         if (rc)
5389                 goto free_userptr;
5390
5391         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5392                                 parser->patched_cb_size, false, false,
5393                                 &patched_cb_handle);
5394         if (rc) {
5395                 dev_err(hdev->dev,
5396                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5397                 goto free_userptr;
5398         }
5399
5400         patched_cb_handle >>= PAGE_SHIFT;
5401         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5402                                 (u32) patched_cb_handle);
5403         /* hl_cb_get should never fail here so use kernel WARN */
5404         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
5405                         (u32) patched_cb_handle);
5406         if (!parser->patched_cb) {
5407                 rc = -EFAULT;
5408                 goto out;
5409         }
5410
5411         rc = gaudi_patch_cb(hdev, parser);
5412
5413         if (rc)
5414                 hl_cb_put(parser->patched_cb);
5415
5416 out:
5417         /*
5418          * Always call cb destroy here because we still have 1 reference
5419          * to it by calling cb_get earlier. After the job will be completed,
5420          * cb_put will release it, but here we want to remove it from the
5421          * idr
5422          */
5423         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5424                                 patched_cb_handle << PAGE_SHIFT);
5425
5426 free_userptr:
5427         if (rc)
5428                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5429         return rc;
5430 }
5431
5432 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5433                                         struct hl_cs_parser *parser)
5434 {
5435         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5436         struct gaudi_device *gaudi = hdev->asic_specific;
5437         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5438                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5439
5440         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5441                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5442                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5443                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5444                                 parser->hw_queue_id);
5445                 return -EINVAL;
5446         }
5447
5448         /* For internal queue jobs just check if CB address is valid */
5449         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5450                                         parser->user_cb_size,
5451                                         asic_prop->sram_user_base_address,
5452                                         asic_prop->sram_end_address))
5453                 return 0;
5454
5455         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5456                                         parser->user_cb_size,
5457                                         asic_prop->dram_user_base_address,
5458                                         asic_prop->dram_end_address))
5459                 return 0;
5460
5461         /* PMMU and HPMMU addresses are equal, check only one of them */
5462         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5463                                         parser->user_cb_size,
5464                                         asic_prop->pmmu.start_addr,
5465                                         asic_prop->pmmu.end_addr))
5466                 return 0;
5467
5468         dev_err(hdev->dev,
5469                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5470                 parser->user_cb, parser->user_cb_size);
5471
5472         return -EFAULT;
5473 }
5474
5475 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5476 {
5477         struct gaudi_device *gaudi = hdev->asic_specific;
5478
5479         if (parser->queue_type == QUEUE_TYPE_INT)
5480                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5481
5482         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5483                 return gaudi_parse_cb_mmu(hdev, parser);
5484         else
5485                 return gaudi_parse_cb_no_mmu(hdev, parser);
5486 }
5487
5488 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5489                                         void *kernel_address, u32 len,
5490                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5491                                         bool eb)
5492 {
5493         struct gaudi_device *gaudi = hdev->asic_specific;
5494         struct packet_msg_prot *cq_pkt;
5495         u32 tmp;
5496
5497         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5498
5499         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5500         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5501
5502         if (eb)
5503                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5504
5505         cq_pkt->ctl = cpu_to_le32(tmp);
5506         cq_pkt->value = cpu_to_le32(cq_val);
5507         cq_pkt->addr = cpu_to_le64(cq_addr);
5508
5509         cq_pkt++;
5510
5511         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5512         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5513         cq_pkt->ctl = cpu_to_le32(tmp);
5514         cq_pkt->value = cpu_to_le32(1);
5515
5516         if (!gaudi->multi_msi_mode)
5517                 msi_vec = 0;
5518
5519         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5520 }
5521
5522 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5523 {
5524         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5525 }
5526
5527 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5528                                         u32 size, u64 val)
5529 {
5530         struct packet_lin_dma *lin_dma_pkt;
5531         struct hl_cs_job *job;
5532         u32 cb_size, ctl, err_cause;
5533         struct hl_cb *cb;
5534         int rc;
5535
5536         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5537         if (!cb)
5538                 return -EFAULT;
5539
5540         lin_dma_pkt = cb->kernel_address;
5541         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5542         cb_size = sizeof(*lin_dma_pkt);
5543
5544         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5545         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5546         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5547         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5548         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5549
5550         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5551         lin_dma_pkt->src_addr = cpu_to_le64(val);
5552         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5553         lin_dma_pkt->tsize = cpu_to_le32(size);
5554
5555         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5556         if (!job) {
5557                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5558                 rc = -ENOMEM;
5559                 goto release_cb;
5560         }
5561
5562         /* Verify DMA is OK */
5563         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5564         if (err_cause && !hdev->init_done) {
5565                 dev_dbg(hdev->dev,
5566                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5567                         err_cause);
5568                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5569         }
5570
5571         job->id = 0;
5572         job->user_cb = cb;
5573         atomic_inc(&job->user_cb->cs_cnt);
5574         job->user_cb_size = cb_size;
5575         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5576         job->patched_cb = job->user_cb;
5577         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5578
5579         hl_debugfs_add_job(hdev, job);
5580
5581         rc = gaudi_send_job_on_qman0(hdev, job);
5582         hl_debugfs_remove_job(hdev, job);
5583         kfree(job);
5584         atomic_dec(&cb->cs_cnt);
5585
5586         /* Verify DMA is OK */
5587         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5588         if (err_cause) {
5589                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5590                 rc = -EIO;
5591                 if (!hdev->init_done) {
5592                         dev_dbg(hdev->dev,
5593                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5594                                 err_cause);
5595                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5596                 }
5597         }
5598
5599 release_cb:
5600         hl_cb_put(cb);
5601         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5602
5603         return rc;
5604 }
5605
5606 static void gaudi_restore_sm_registers(struct hl_device *hdev)
5607 {
5608         int i;
5609
5610         for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
5611                 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5612                 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5613                 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5614         }
5615
5616         for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
5617                 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5618                 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5619                 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5620         }
5621
5622         i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
5623
5624         for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
5625                 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
5626
5627         i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
5628
5629         for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
5630                 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
5631 }
5632
5633 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5634 {
5635         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5636                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5637         int i;
5638
5639         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5640                 u64 sob_addr = CFG_BASE +
5641                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5642                                 (i * sob_delta);
5643                 u32 dma_offset = i * DMA_CORE_OFFSET;
5644
5645                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5646                                 lower_32_bits(sob_addr));
5647                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5648                                 upper_32_bits(sob_addr));
5649                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5650
5651                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5652                  * modified by the user for SRAM reduction
5653                  */
5654                 if (i > 1)
5655                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5656                                                                 0x00000001);
5657         }
5658 }
5659
5660 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5661 {
5662         u32 qman_offset;
5663         int i;
5664
5665         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5666                 qman_offset = i * DMA_QMAN_OFFSET;
5667                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5668         }
5669
5670         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5671                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5672                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5673         }
5674
5675         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5676                 qman_offset = i * TPC_QMAN_OFFSET;
5677                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5678         }
5679
5680         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5681                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5682                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5683                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5684         }
5685 }
5686
5687 static void gaudi_restore_user_registers(struct hl_device *hdev)
5688 {
5689         gaudi_restore_sm_registers(hdev);
5690         gaudi_restore_dma_registers(hdev);
5691         gaudi_restore_qm_registers(hdev);
5692 }
5693
5694 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5695 {
5696         gaudi_restore_user_registers(hdev);
5697
5698         return 0;
5699 }
5700
5701 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5702 {
5703         struct asic_fixed_properties *prop = &hdev->asic_prop;
5704         struct gaudi_device *gaudi = hdev->asic_specific;
5705         u64 addr = prop->mmu_pgt_addr;
5706         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
5707
5708         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5709                 return 0;
5710
5711         return gaudi_memset_device_memory(hdev, addr, size, 0);
5712 }
5713
5714 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5715 {
5716
5717 }
5718
5719 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
5720 {
5721         struct asic_fixed_properties *prop = &hdev->asic_prop;
5722         struct gaudi_device *gaudi = hdev->asic_specific;
5723         u64 hbm_bar_addr;
5724         int rc = 0;
5725
5726         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5727
5728                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5729                                 (hdev->clock_gating_mask &
5730                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5731
5732                         dev_err_ratelimited(hdev->dev,
5733                                 "Can't read register - clock gating is enabled!\n");
5734                         rc = -EFAULT;
5735                 } else {
5736                         *val = RREG32(addr - CFG_BASE);
5737                 }
5738
5739         } else if ((addr >= SRAM_BASE_ADDR) &&
5740                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5741                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
5742                                 (addr - SRAM_BASE_ADDR));
5743         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5744                 u64 bar_base_addr = DRAM_PHYS_BASE +
5745                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5746
5747                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5748                 if (hbm_bar_addr != U64_MAX) {
5749                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
5750                                                 (addr - bar_base_addr));
5751
5752                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5753                                                 hbm_bar_addr);
5754                 }
5755                 if (hbm_bar_addr == U64_MAX)
5756                         rc = -EIO;
5757         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5758                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
5759         } else {
5760                 rc = -EFAULT;
5761         }
5762
5763         return rc;
5764 }
5765
5766 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
5767 {
5768         struct asic_fixed_properties *prop = &hdev->asic_prop;
5769         struct gaudi_device *gaudi = hdev->asic_specific;
5770         u64 hbm_bar_addr;
5771         int rc = 0;
5772
5773         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
5774
5775                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5776                                 (hdev->clock_gating_mask &
5777                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5778
5779                         dev_err_ratelimited(hdev->dev,
5780                                 "Can't write register - clock gating is enabled!\n");
5781                         rc = -EFAULT;
5782                 } else {
5783                         WREG32(addr - CFG_BASE, val);
5784                 }
5785
5786         } else if ((addr >= SRAM_BASE_ADDR) &&
5787                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
5788                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
5789                                         (addr - SRAM_BASE_ADDR));
5790         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
5791                 u64 bar_base_addr = DRAM_PHYS_BASE +
5792                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5793
5794                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5795                 if (hbm_bar_addr != U64_MAX) {
5796                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
5797                                                 (addr - bar_base_addr));
5798
5799                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5800                                                 hbm_bar_addr);
5801                 }
5802                 if (hbm_bar_addr == U64_MAX)
5803                         rc = -EIO;
5804         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5805                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5806         } else {
5807                 rc = -EFAULT;
5808         }
5809
5810         return rc;
5811 }
5812
5813 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
5814 {
5815         struct asic_fixed_properties *prop = &hdev->asic_prop;
5816         struct gaudi_device *gaudi = hdev->asic_specific;
5817         u64 hbm_bar_addr;
5818         int rc = 0;
5819
5820         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
5821
5822                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5823                                 (hdev->clock_gating_mask &
5824                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5825
5826                         dev_err_ratelimited(hdev->dev,
5827                                 "Can't read register - clock gating is enabled!\n");
5828                         rc = -EFAULT;
5829                 } else {
5830                         u32 val_l = RREG32(addr - CFG_BASE);
5831                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
5832
5833                         *val = (((u64) val_h) << 32) | val_l;
5834                 }
5835
5836         } else if ((addr >= SRAM_BASE_ADDR) &&
5837                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5838                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
5839                                 (addr - SRAM_BASE_ADDR));
5840         } else if (addr <=
5841                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5842                 u64 bar_base_addr = DRAM_PHYS_BASE +
5843                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5844
5845                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5846                 if (hbm_bar_addr != U64_MAX) {
5847                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
5848                                                 (addr - bar_base_addr));
5849
5850                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5851                                                 hbm_bar_addr);
5852                 }
5853                 if (hbm_bar_addr == U64_MAX)
5854                         rc = -EIO;
5855         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5856                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
5857         } else {
5858                 rc = -EFAULT;
5859         }
5860
5861         return rc;
5862 }
5863
5864 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
5865 {
5866         struct asic_fixed_properties *prop = &hdev->asic_prop;
5867         struct gaudi_device *gaudi = hdev->asic_specific;
5868         u64 hbm_bar_addr;
5869         int rc = 0;
5870
5871         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
5872
5873                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
5874                                 (hdev->clock_gating_mask &
5875                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
5876
5877                         dev_err_ratelimited(hdev->dev,
5878                                 "Can't write register - clock gating is enabled!\n");
5879                         rc = -EFAULT;
5880                 } else {
5881                         WREG32(addr - CFG_BASE, lower_32_bits(val));
5882                         WREG32(addr + sizeof(u32) - CFG_BASE,
5883                                 upper_32_bits(val));
5884                 }
5885
5886         } else if ((addr >= SRAM_BASE_ADDR) &&
5887                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
5888                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
5889                                         (addr - SRAM_BASE_ADDR));
5890         } else if (addr <=
5891                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
5892                 u64 bar_base_addr = DRAM_PHYS_BASE +
5893                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
5894
5895                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
5896                 if (hbm_bar_addr != U64_MAX) {
5897                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5898                                                 (addr - bar_base_addr));
5899
5900                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
5901                                                 hbm_bar_addr);
5902                 }
5903                 if (hbm_bar_addr == U64_MAX)
5904                         rc = -EIO;
5905         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
5906                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
5907         } else {
5908                 rc = -EFAULT;
5909         }
5910
5911         return rc;
5912 }
5913
5914 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
5915 {
5916         struct gaudi_device *gaudi = hdev->asic_specific;
5917
5918         if (hdev->hard_reset_pending)
5919                 return U64_MAX;
5920
5921         return readq(hdev->pcie_bar[HBM_BAR_ID] +
5922                         (addr - gaudi->hbm_bar_cur_addr));
5923 }
5924
5925 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
5926 {
5927         struct gaudi_device *gaudi = hdev->asic_specific;
5928
5929         if (hdev->hard_reset_pending)
5930                 return;
5931
5932         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
5933                         (addr - gaudi->hbm_bar_cur_addr));
5934 }
5935
5936 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
5937 {
5938         /* mask to zero the MMBP and ASID bits */
5939         WREG32_AND(reg, ~0x7FF);
5940         WREG32_OR(reg, asid);
5941 }
5942
5943 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
5944 {
5945         struct gaudi_device *gaudi = hdev->asic_specific;
5946
5947         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5948                 return;
5949
5950         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
5951                 WARN(1, "asid %u is too big\n", asid);
5952                 return;
5953         }
5954
5955         mutex_lock(&gaudi->clk_gate_mutex);
5956
5957         hdev->asic_funcs->disable_clock_gating(hdev);
5958
5959         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5960         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5961         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5962         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5963         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5964
5965         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5966         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5967         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5968         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5969         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5970
5971         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5972         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5973         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5974         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5975         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5976
5977         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5978         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5979         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5980         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5981         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5982
5983         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
5984         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
5985         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
5986         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
5987         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
5988
5989         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
5990         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
5991         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
5992         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
5993         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
5994
5995         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
5996         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
5997         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5998         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5999         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6000
6001         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6002         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6003         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6004         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6005         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6006
6007         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6008         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6009         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6010         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6011         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6012         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6013         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6014         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6015
6016         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6017         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6018         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6019         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6020         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6021         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6022         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6023
6024         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6025         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6026         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6027         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6028         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6029         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6030         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6031
6032         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6033         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6034         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6035         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6036         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6037         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6038         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6039
6040         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6041         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6042         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6043         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6044         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6045         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6046         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6047
6048         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6049         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6050         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6051         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6052         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6053         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6054         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6055
6056         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6057         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6058         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6059         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6060         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6061         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6062         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6063
6064         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6065         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6066         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6067         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6068         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6069         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6070         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6071
6072         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6073         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6074         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6075         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6076         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6077         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6078         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6079
6080         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6081         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6082         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6083         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6084         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6085         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6086         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6087         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6088         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6089         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6090
6091         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6092         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6093         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6094         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6095         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6096         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6097         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6098         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6099         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6100         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6101         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6102         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6103
6104         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC0) {
6105                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6106                                 asid);
6107                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6108                                 asid);
6109                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6110                                 asid);
6111                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6112                                 asid);
6113                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6114                                 asid);
6115         }
6116
6117         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC1) {
6118                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6119                                 asid);
6120                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6121                                 asid);
6122                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6123                                 asid);
6124                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6125                                 asid);
6126                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6127                                 asid);
6128         }
6129
6130         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC2) {
6131                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6132                                 asid);
6133                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6134                                 asid);
6135                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6136                                 asid);
6137                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6138                                 asid);
6139                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6140                                 asid);
6141         }
6142
6143         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC3) {
6144                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6145                                 asid);
6146                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6147                                 asid);
6148                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6149                                 asid);
6150                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6151                                 asid);
6152                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6153                                 asid);
6154         }
6155
6156         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC4) {
6157                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6158                                 asid);
6159                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6160                                 asid);
6161                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6162                                 asid);
6163                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6164                                 asid);
6165                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6166                                 asid);
6167         }
6168
6169         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC5) {
6170                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6171                                 asid);
6172                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6173                                 asid);
6174                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6175                                 asid);
6176                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6177                                 asid);
6178                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6179                                 asid);
6180         }
6181
6182         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC6) {
6183                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6184                                 asid);
6185                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6186                                 asid);
6187                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6188                                 asid);
6189                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6190                                 asid);
6191                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6192                                 asid);
6193         }
6194
6195         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC7) {
6196                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6197                                 asid);
6198                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6199                                 asid);
6200                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6201                                 asid);
6202                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6203                                 asid);
6204                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6205                                 asid);
6206         }
6207
6208         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC8) {
6209                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6210                                 asid);
6211                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6212                                 asid);
6213                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6214                                 asid);
6215                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6216                                 asid);
6217                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6218                                 asid);
6219         }
6220
6221         if (hdev->nic_ports_mask & GAUDI_NIC_MASK_NIC9) {
6222                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6223                                 asid);
6224                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6225                                 asid);
6226                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6227                                 asid);
6228                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6229                                 asid);
6230                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6231                                 asid);
6232         }
6233
6234         hdev->asic_funcs->set_clock_gating(hdev);
6235
6236         mutex_unlock(&gaudi->clk_gate_mutex);
6237 }
6238
6239 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6240                 struct hl_cs_job *job)
6241 {
6242         struct packet_msg_prot *fence_pkt;
6243         u32 *fence_ptr;
6244         dma_addr_t fence_dma_addr;
6245         struct hl_cb *cb;
6246         u32 tmp, timeout, dma_offset;
6247         int rc;
6248
6249         if (hdev->pldm)
6250                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6251         else
6252                 timeout = HL_DEVICE_TIMEOUT_USEC;
6253
6254         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
6255                 dev_err_ratelimited(hdev->dev,
6256                         "Can't send driver job on QMAN0 because the device is not idle\n");
6257                 return -EBUSY;
6258         }
6259
6260         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6261                                                         &fence_dma_addr);
6262         if (!fence_ptr) {
6263                 dev_err(hdev->dev,
6264                         "Failed to allocate fence memory for QMAN0\n");
6265                 return -ENOMEM;
6266         }
6267
6268         cb = job->patched_cb;
6269
6270         fence_pkt = cb->kernel_address +
6271                         job->job_cb_size - sizeof(struct packet_msg_prot);
6272
6273         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6274         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6275         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6276
6277         fence_pkt->ctl = cpu_to_le32(tmp);
6278         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6279         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6280
6281         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6282
6283         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6284
6285         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6286                                         job->job_cb_size, cb->bus_address);
6287         if (rc) {
6288                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6289                 goto free_fence_ptr;
6290         }
6291
6292         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6293                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6294                                 timeout, true);
6295
6296         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6297
6298         if (rc == -ETIMEDOUT) {
6299                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6300                 goto free_fence_ptr;
6301         }
6302
6303 free_fence_ptr:
6304         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6305                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6306
6307         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6308                                         fence_dma_addr);
6309         return rc;
6310 }
6311
6312 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6313 {
6314         if (event_type >= GAUDI_EVENT_SIZE)
6315                 goto event_not_supported;
6316
6317         if (!gaudi_irq_map_table[event_type].valid)
6318                 goto event_not_supported;
6319
6320         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6321
6322         return;
6323
6324 event_not_supported:
6325         snprintf(desc, size, "N/A");
6326 }
6327
6328 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6329                                                         u32 x_y, bool is_write)
6330 {
6331         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6332
6333         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6334                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6335
6336         switch (x_y) {
6337         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6338         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6339                 dma_id[0] = 0;
6340                 dma_id[1] = 2;
6341                 break;
6342         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6343         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6344                 dma_id[0] = 1;
6345                 dma_id[1] = 3;
6346                 break;
6347         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6348         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6349                 dma_id[0] = 4;
6350                 dma_id[1] = 6;
6351                 break;
6352         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6353         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6354                 dma_id[0] = 5;
6355                 dma_id[1] = 7;
6356                 break;
6357         default:
6358                 goto unknown_initiator;
6359         }
6360
6361         for (i = 0 ; i < 2 ; i++) {
6362                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6363                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6364         }
6365
6366         switch (x_y) {
6367         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6368         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6369                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6370                         return "DMA0";
6371                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6372                         return "DMA2";
6373                 else
6374                         return "DMA0 or DMA2";
6375         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6376         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6377                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6378                         return "DMA1";
6379                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6380                         return "DMA3";
6381                 else
6382                         return "DMA1 or DMA3";
6383         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6384         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6385                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6386                         return "DMA4";
6387                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6388                         return "DMA6";
6389                 else
6390                         return "DMA4 or DMA6";
6391         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6392         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6393                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6394                         return "DMA5";
6395                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6396                         return "DMA7";
6397                 else
6398                         return "DMA5 or DMA7";
6399         }
6400
6401 unknown_initiator:
6402         return "unknown initiator";
6403 }
6404
6405 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6406                                                         bool is_write)
6407 {
6408         u32 val, x_y, axi_id;
6409
6410         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6411                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6412         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6413                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6414         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6415                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6416
6417         switch (x_y) {
6418         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6419                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6420                         return "TPC0";
6421                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6422                         return "NIC0";
6423                 break;
6424         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6425                 return "TPC1";
6426         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6427         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6428                 return "MME0";
6429         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6430         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6431                 return "MME1";
6432         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6433                 return "TPC2";
6434         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6435                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6436                         return "TPC3";
6437                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6438                         return "PCI";
6439                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6440                         return "CPU";
6441                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6442                         return "PSOC";
6443                 break;
6444         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6445         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6446         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6447         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6448         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6449         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6450         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6451         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6452                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6453         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6454                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6455                         return "TPC4";
6456                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6457                         return "NIC1";
6458                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6459                         return "NIC2";
6460                 break;
6461         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6462                 return "TPC5";
6463         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6464         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6465                 return "MME2";
6466         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6467         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6468                 return "MME3";
6469         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6470                 return "TPC6";
6471         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6472                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6473                         return "TPC7";
6474                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6475                         return "NIC4";
6476                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6477                         return "NIC5";
6478                 break;
6479         default:
6480                 break;
6481         }
6482
6483         dev_err(hdev->dev,
6484                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6485                 val,
6486                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6487                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6488                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6489                         RAZWI_INITIATOR_AXI_ID_MASK);
6490
6491         return "unknown initiator";
6492 }
6493
6494 static void gaudi_print_razwi_info(struct hl_device *hdev)
6495 {
6496         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6497                 dev_err_ratelimited(hdev->dev,
6498                         "RAZWI event caused by illegal write of %s\n",
6499                         gaudi_get_razwi_initiator_name(hdev, true));
6500                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6501         }
6502
6503         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6504                 dev_err_ratelimited(hdev->dev,
6505                         "RAZWI event caused by illegal read of %s\n",
6506                         gaudi_get_razwi_initiator_name(hdev, false));
6507                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6508         }
6509 }
6510
6511 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
6512 {
6513         struct gaudi_device *gaudi = hdev->asic_specific;
6514         u64 addr;
6515         u32 val;
6516
6517         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6518                 return;
6519
6520         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6521         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6522                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6523                 addr <<= 32;
6524                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6525
6526                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
6527                                         addr);
6528
6529                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6530         }
6531
6532         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6533         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6534                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6535                 addr <<= 32;
6536                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6537
6538                 dev_err_ratelimited(hdev->dev,
6539                                 "MMU access error on va 0x%llx\n", addr);
6540
6541                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6542         }
6543 }
6544
6545 /*
6546  *  +-------------------+------------------------------------------------------+
6547  *  | Configuration Reg |                     Description                      |
6548  *  |      Address      |                                                      |
6549  *  +-------------------+------------------------------------------------------+
6550  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6551  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6552  *  |                   |0xF34 memory wrappers 63:32                           |
6553  *  |                   |0xF38 memory wrappers 95:64                           |
6554  *  |                   |0xF3C memory wrappers 127:96                          |
6555  *  +-------------------+------------------------------------------------------+
6556  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6557  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6558  *  |                   |0xF44 memory wrappers 63:32                           |
6559  *  |                   |0xF48 memory wrappers 95:64                           |
6560  *  |                   |0xF4C memory wrappers 127:96                          |
6561  *  +-------------------+------------------------------------------------------+
6562  */
6563 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6564                 struct ecc_info_extract_params *params, u64 *ecc_address,
6565                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6566 {
6567         struct gaudi_device *gaudi = hdev->asic_specific;
6568         u32 i, num_mem_regs, reg, err_bit;
6569         u64 err_addr, err_word = 0;
6570         int rc = 0;
6571
6572         num_mem_regs = params->num_memories / 32 +
6573                         ((params->num_memories % 32) ? 1 : 0);
6574
6575         if (params->block_address >= CFG_BASE)
6576                 params->block_address -= CFG_BASE;
6577
6578         if (params->derr)
6579                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6580         else
6581                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6582
6583         if (params->disable_clock_gating) {
6584                 mutex_lock(&gaudi->clk_gate_mutex);
6585                 hdev->asic_funcs->disable_clock_gating(hdev);
6586         }
6587
6588         /* Set invalid wrapper index */
6589         *memory_wrapper_idx = 0xFF;
6590
6591         /* Iterate through memory wrappers, a single bit must be set */
6592         for (i = 0 ; i < num_mem_regs ; i++) {
6593                 err_addr += i * 4;
6594                 err_word = RREG32(err_addr);
6595                 if (err_word) {
6596                         err_bit = __ffs(err_word);
6597                         *memory_wrapper_idx = err_bit + (32 * i);
6598                         break;
6599                 }
6600         }
6601
6602         if (*memory_wrapper_idx == 0xFF) {
6603                 dev_err(hdev->dev, "ECC error information cannot be found\n");
6604                 rc = -EINVAL;
6605                 goto enable_clk_gate;
6606         }
6607
6608         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6609                         *memory_wrapper_idx);
6610
6611         *ecc_address =
6612                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6613         *ecc_syndrom =
6614                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6615
6616         /* Clear error indication */
6617         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6618         if (params->derr)
6619                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6620         else
6621                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6622
6623         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6624
6625 enable_clk_gate:
6626         if (params->disable_clock_gating) {
6627                 hdev->asic_funcs->set_clock_gating(hdev);
6628
6629                 mutex_unlock(&gaudi->clk_gate_mutex);
6630         }
6631
6632         return rc;
6633 }
6634
6635 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6636                                           const char *qm_name,
6637                                           u64 glbl_sts_addr,
6638                                           u64 arb_err_addr)
6639 {
6640         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6641         char reg_desc[32];
6642
6643         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6644         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6645                 glbl_sts_clr_val = 0;
6646                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6647
6648                 if (!glbl_sts_val)
6649                         continue;
6650
6651                 if (i == QMAN_STREAMS)
6652                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6653                 else
6654                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6655
6656                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6657                         if (glbl_sts_val & BIT(j)) {
6658                                 dev_err_ratelimited(hdev->dev,
6659                                                 "%s %s. err cause: %s\n",
6660                                                 qm_name, reg_desc,
6661                                                 gaudi_qman_error_cause[j]);
6662                                 glbl_sts_clr_val |= BIT(j);
6663                         }
6664                 }
6665
6666                 /* Write 1 clear errors */
6667                 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6668         }
6669
6670         arb_err_val = RREG32(arb_err_addr);
6671
6672         if (!arb_err_val)
6673                 return;
6674
6675         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6676                 if (arb_err_val & BIT(j)) {
6677                         dev_err_ratelimited(hdev->dev,
6678                                         "%s ARB_ERR. err cause: %s\n",
6679                                         qm_name,
6680                                         gaudi_qman_arb_error_cause[j]);
6681                 }
6682         }
6683 }
6684
6685 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
6686                 struct hl_eq_ecc_data *ecc_data)
6687 {
6688         struct ecc_info_extract_params params;
6689         u64 ecc_address = 0, ecc_syndrom = 0;
6690         u8 index, memory_wrapper_idx = 0;
6691         bool extract_info_from_fw;
6692         int rc;
6693
6694         switch (event_type) {
6695         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
6696         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
6697                 extract_info_from_fw = true;
6698                 break;
6699         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6700                 index = event_type - GAUDI_EVENT_TPC0_SERR;
6701                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6702                 params.num_memories = 90;
6703                 params.derr = false;
6704                 params.disable_clock_gating = true;
6705                 extract_info_from_fw = false;
6706                 break;
6707         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6708                 index = event_type - GAUDI_EVENT_TPC0_DERR;
6709                 params.block_address =
6710                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
6711                 params.num_memories = 90;
6712                 params.derr = true;
6713                 params.disable_clock_gating = true;
6714                 extract_info_from_fw = false;
6715                 break;
6716         case GAUDI_EVENT_MME0_ACC_SERR:
6717         case GAUDI_EVENT_MME1_ACC_SERR:
6718         case GAUDI_EVENT_MME2_ACC_SERR:
6719         case GAUDI_EVENT_MME3_ACC_SERR:
6720                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
6721                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6722                 params.num_memories = 128;
6723                 params.derr = false;
6724                 params.disable_clock_gating = true;
6725                 extract_info_from_fw = false;
6726                 break;
6727         case GAUDI_EVENT_MME0_ACC_DERR:
6728         case GAUDI_EVENT_MME1_ACC_DERR:
6729         case GAUDI_EVENT_MME2_ACC_DERR:
6730         case GAUDI_EVENT_MME3_ACC_DERR:
6731                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
6732                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
6733                 params.num_memories = 128;
6734                 params.derr = true;
6735                 params.disable_clock_gating = true;
6736                 extract_info_from_fw = false;
6737                 break;
6738         case GAUDI_EVENT_MME0_SBAB_SERR:
6739         case GAUDI_EVENT_MME1_SBAB_SERR:
6740         case GAUDI_EVENT_MME2_SBAB_SERR:
6741         case GAUDI_EVENT_MME3_SBAB_SERR:
6742                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
6743                 params.block_address =
6744                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6745                 params.num_memories = 33;
6746                 params.derr = false;
6747                 params.disable_clock_gating = true;
6748                 extract_info_from_fw = false;
6749                 break;
6750         case GAUDI_EVENT_MME0_SBAB_DERR:
6751         case GAUDI_EVENT_MME1_SBAB_DERR:
6752         case GAUDI_EVENT_MME2_SBAB_DERR:
6753         case GAUDI_EVENT_MME3_SBAB_DERR:
6754                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
6755                 params.block_address =
6756                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
6757                 params.num_memories = 33;
6758                 params.derr = true;
6759                 params.disable_clock_gating = true;
6760                 extract_info_from_fw = false;
6761                 break;
6762         default:
6763                 return;
6764         }
6765
6766         if (extract_info_from_fw) {
6767                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
6768                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
6769                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
6770         } else {
6771                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
6772                                 &ecc_syndrom, &memory_wrapper_idx);
6773                 if (rc)
6774                         return;
6775         }
6776
6777         dev_err(hdev->dev,
6778                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
6779                 ecc_address, ecc_syndrom, memory_wrapper_idx);
6780 }
6781
6782 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6783 {
6784         u64 glbl_sts_addr, arb_err_addr;
6785         u8 index;
6786         char desc[32];
6787
6788         switch (event_type) {
6789         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6790                 index = event_type - GAUDI_EVENT_TPC0_QM;
6791                 glbl_sts_addr =
6792                         mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6793                 arb_err_addr =
6794                         mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6795                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6796                 break;
6797         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6798                 index = event_type - GAUDI_EVENT_MME0_QM;
6799                 glbl_sts_addr =
6800                         mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
6801                 arb_err_addr =
6802                         mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
6803                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
6804                 break;
6805         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
6806                 index = event_type - GAUDI_EVENT_DMA0_QM;
6807                 glbl_sts_addr =
6808                         mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
6809                 arb_err_addr =
6810                         mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
6811                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
6812                 break;
6813         case GAUDI_EVENT_NIC0_QM0:
6814                 glbl_sts_addr = mmNIC0_QM0_GLBL_STS1_0;
6815                 arb_err_addr = mmNIC0_QM0_ARB_ERR_CAUSE;
6816                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
6817                 break;
6818         case GAUDI_EVENT_NIC0_QM1:
6819                 glbl_sts_addr = mmNIC0_QM1_GLBL_STS1_0;
6820                 arb_err_addr = mmNIC0_QM1_ARB_ERR_CAUSE;
6821                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
6822                 break;
6823         case GAUDI_EVENT_NIC1_QM0:
6824                 glbl_sts_addr = mmNIC1_QM0_GLBL_STS1_0;
6825                 arb_err_addr = mmNIC1_QM0_ARB_ERR_CAUSE;
6826                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
6827                 break;
6828         case GAUDI_EVENT_NIC1_QM1:
6829                 glbl_sts_addr = mmNIC1_QM1_GLBL_STS1_0;
6830                 arb_err_addr = mmNIC1_QM1_ARB_ERR_CAUSE;
6831                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
6832                 break;
6833         case GAUDI_EVENT_NIC2_QM0:
6834                 glbl_sts_addr = mmNIC2_QM0_GLBL_STS1_0;
6835                 arb_err_addr = mmNIC2_QM0_ARB_ERR_CAUSE;
6836                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
6837                 break;
6838         case GAUDI_EVENT_NIC2_QM1:
6839                 glbl_sts_addr = mmNIC2_QM1_GLBL_STS1_0;
6840                 arb_err_addr = mmNIC2_QM1_ARB_ERR_CAUSE;
6841                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
6842                 break;
6843         case GAUDI_EVENT_NIC3_QM0:
6844                 glbl_sts_addr = mmNIC3_QM0_GLBL_STS1_0;
6845                 arb_err_addr = mmNIC3_QM0_ARB_ERR_CAUSE;
6846                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
6847                 break;
6848         case GAUDI_EVENT_NIC3_QM1:
6849                 glbl_sts_addr = mmNIC3_QM1_GLBL_STS1_0;
6850                 arb_err_addr = mmNIC3_QM1_ARB_ERR_CAUSE;
6851                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
6852                 break;
6853         case GAUDI_EVENT_NIC4_QM0:
6854                 glbl_sts_addr = mmNIC4_QM0_GLBL_STS1_0;
6855                 arb_err_addr = mmNIC4_QM0_ARB_ERR_CAUSE;
6856                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
6857                 break;
6858         case GAUDI_EVENT_NIC4_QM1:
6859                 glbl_sts_addr = mmNIC4_QM1_GLBL_STS1_0;
6860                 arb_err_addr = mmNIC4_QM1_ARB_ERR_CAUSE;
6861                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
6862                 break;
6863         default:
6864                 return;
6865         }
6866
6867         gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
6868 }
6869
6870 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
6871                                         bool razwi)
6872 {
6873         char desc[64] = "";
6874
6875         gaudi_get_event_desc(event_type, desc, sizeof(desc));
6876         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6877                 event_type, desc);
6878
6879         if (razwi) {
6880                 gaudi_print_razwi_info(hdev);
6881                 gaudi_print_mmu_error_info(hdev);
6882         }
6883 }
6884
6885 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
6886 {
6887         struct gaudi_device *gaudi = hdev->asic_specific;
6888
6889         /* Unmask all IRQs since some could have been received
6890          * during the soft reset
6891          */
6892         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
6893 }
6894
6895 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
6896                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
6897 {
6898         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
6899         int err = 0;
6900
6901         if (!hdev->asic_prop.fw_security_disabled) {
6902                 if (!hbm_ecc_data) {
6903                         dev_err(hdev->dev, "No FW ECC data");
6904                         return 0;
6905                 }
6906
6907                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
6908                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6909                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
6910                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6911                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
6912                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6913                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
6914                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6915                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
6916                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6917                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
6918                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6919                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
6920                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
6921
6922                 dev_err(hdev->dev,
6923                         "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6924                         device, ch, type, wr_par, rd_par, ca_par, serr, derr);
6925
6926                 err = 1;
6927
6928                 return 0;
6929         }
6930
6931         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
6932         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
6933                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
6934                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6935                 if (val) {
6936                         err = 1;
6937                         dev_err(hdev->dev,
6938                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6939                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
6940                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
6941                                 (val >> 4) & 0x1);
6942
6943                         val2 = RREG32(base + ch * 0x1000 + 0x060);
6944                         dev_err(hdev->dev,
6945                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6946                                 device, ch * 2,
6947                                 RREG32(base + ch * 0x1000 + 0x064),
6948                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6949                                 (val2 & 0xFF0000) >> 16,
6950                                 (val2 & 0xFF000000) >> 24);
6951                 }
6952
6953                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
6954                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6955                 if (val) {
6956                         err = 1;
6957                         dev_err(hdev->dev,
6958                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6959                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
6960                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
6961                                 (val >> 4) & 0x1);
6962
6963                         val2 = RREG32(base + ch * 0x1000 + 0x070);
6964                         dev_err(hdev->dev,
6965                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
6966                                 device, ch * 2 + 1,
6967                                 RREG32(base + ch * 0x1000 + 0x074),
6968                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6969                                 (val2 & 0xFF0000) >> 16,
6970                                 (val2 & 0xFF000000) >> 24);
6971                 }
6972
6973                 /* Clear interrupts */
6974                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
6975                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
6976                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
6977                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
6978                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
6979                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
6980         }
6981
6982         val  = RREG32(base + 0x8F30);
6983         val2 = RREG32(base + 0x8F34);
6984         if (val | val2) {
6985                 err = 1;
6986                 dev_err(hdev->dev,
6987                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6988                         device, val, val2);
6989         }
6990         val  = RREG32(base + 0x8F40);
6991         val2 = RREG32(base + 0x8F44);
6992         if (val | val2) {
6993                 err = 1;
6994                 dev_err(hdev->dev,
6995                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
6996                         device, val, val2);
6997         }
6998
6999         return err;
7000 }
7001
7002 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7003 {
7004         switch (hbm_event_type) {
7005         case GAUDI_EVENT_HBM0_SPI_0:
7006         case GAUDI_EVENT_HBM0_SPI_1:
7007                 return 0;
7008         case GAUDI_EVENT_HBM1_SPI_0:
7009         case GAUDI_EVENT_HBM1_SPI_1:
7010                 return 1;
7011         case GAUDI_EVENT_HBM2_SPI_0:
7012         case GAUDI_EVENT_HBM2_SPI_1:
7013                 return 2;
7014         case GAUDI_EVENT_HBM3_SPI_0:
7015         case GAUDI_EVENT_HBM3_SPI_1:
7016                 return 3;
7017         default:
7018                 break;
7019         }
7020
7021         /* Should never happen */
7022         return 0;
7023 }
7024
7025 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7026                                         char *interrupt_name)
7027 {
7028         struct gaudi_device *gaudi = hdev->asic_specific;
7029         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7030         bool soft_reset_required = false;
7031
7032         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7033          * gating, and thus cannot be done in CPU-CP and should be done instead
7034          * by the driver.
7035          */
7036
7037         mutex_lock(&gaudi->clk_gate_mutex);
7038
7039         hdev->asic_funcs->disable_clock_gating(hdev);
7040
7041         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7042                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7043
7044         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7045                 if (tpc_interrupts_cause & BIT(i)) {
7046                         dev_err_ratelimited(hdev->dev,
7047                                         "TPC%d_%s interrupt cause: %s\n",
7048                                         tpc_id, interrupt_name,
7049                                         gaudi_tpc_interrupts_cause[i]);
7050                         /* If this is QM error, we need to soft-reset */
7051                         if (i == 15)
7052                                 soft_reset_required = true;
7053                 }
7054
7055         /* Clear interrupts */
7056         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7057
7058         hdev->asic_funcs->set_clock_gating(hdev);
7059
7060         mutex_unlock(&gaudi->clk_gate_mutex);
7061
7062         return soft_reset_required;
7063 }
7064
7065 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7066 {
7067         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7068 }
7069
7070 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7071 {
7072         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7073 }
7074
7075 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7076                                         u16 event_type)
7077 {
7078         switch (event_type) {
7079         case GAUDI_EVENT_FIX_POWER_ENV_S:
7080                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7081                 dev_info_ratelimited(hdev->dev,
7082                         "Clock throttling due to power consumption\n");
7083                 break;
7084
7085         case GAUDI_EVENT_FIX_POWER_ENV_E:
7086                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7087                 dev_info_ratelimited(hdev->dev,
7088                         "Power envelop is safe, back to optimal clock\n");
7089                 break;
7090
7091         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7092                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7093                 dev_info_ratelimited(hdev->dev,
7094                         "Clock throttling due to overheating\n");
7095                 break;
7096
7097         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7098                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7099                 dev_info_ratelimited(hdev->dev,
7100                         "Thermal envelop is safe, back to optimal clock\n");
7101                 break;
7102
7103         default:
7104                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7105                         event_type);
7106                 break;
7107         }
7108 }
7109
7110 static void gaudi_handle_eqe(struct hl_device *hdev,
7111                                 struct hl_eq_entry *eq_entry)
7112 {
7113         struct gaudi_device *gaudi = hdev->asic_specific;
7114         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7115         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7116                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7117         u8 cause;
7118         bool reset_required;
7119
7120         gaudi->events_stat[event_type]++;
7121         gaudi->events_stat_aggregate[event_type]++;
7122
7123         switch (event_type) {
7124         case GAUDI_EVENT_PCIE_CORE_DERR:
7125         case GAUDI_EVENT_PCIE_IF_DERR:
7126         case GAUDI_EVENT_PCIE_PHY_DERR:
7127         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7128         case GAUDI_EVENT_MME0_ACC_DERR:
7129         case GAUDI_EVENT_MME0_SBAB_DERR:
7130         case GAUDI_EVENT_MME1_ACC_DERR:
7131         case GAUDI_EVENT_MME1_SBAB_DERR:
7132         case GAUDI_EVENT_MME2_ACC_DERR:
7133         case GAUDI_EVENT_MME2_SBAB_DERR:
7134         case GAUDI_EVENT_MME3_ACC_DERR:
7135         case GAUDI_EVENT_MME3_SBAB_DERR:
7136         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7137                 fallthrough;
7138         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7139         case GAUDI_EVENT_PSOC_MEM_DERR:
7140         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7141         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7142         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7143         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7144         case GAUDI_EVENT_MMU_DERR:
7145                 gaudi_print_irq_info(hdev, event_type, true);
7146                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7147                 if (hdev->hard_reset_on_fw_events)
7148                         hl_device_reset(hdev, true, false);
7149                 break;
7150
7151         case GAUDI_EVENT_GIC500:
7152         case GAUDI_EVENT_AXI_ECC:
7153         case GAUDI_EVENT_L2_RAM_ECC:
7154         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7155                 gaudi_print_irq_info(hdev, event_type, false);
7156                 if (hdev->hard_reset_on_fw_events)
7157                         hl_device_reset(hdev, true, false);
7158                 break;
7159
7160         case GAUDI_EVENT_HBM0_SPI_0:
7161         case GAUDI_EVENT_HBM1_SPI_0:
7162         case GAUDI_EVENT_HBM2_SPI_0:
7163         case GAUDI_EVENT_HBM3_SPI_0:
7164                 gaudi_print_irq_info(hdev, event_type, false);
7165                 gaudi_hbm_read_interrupts(hdev,
7166                                 gaudi_hbm_event_to_dev(event_type),
7167                                 &eq_entry->hbm_ecc_data);
7168                 if (hdev->hard_reset_on_fw_events)
7169                         hl_device_reset(hdev, true, false);
7170                 break;
7171
7172         case GAUDI_EVENT_HBM0_SPI_1:
7173         case GAUDI_EVENT_HBM1_SPI_1:
7174         case GAUDI_EVENT_HBM2_SPI_1:
7175         case GAUDI_EVENT_HBM3_SPI_1:
7176                 gaudi_print_irq_info(hdev, event_type, false);
7177                 gaudi_hbm_read_interrupts(hdev,
7178                                 gaudi_hbm_event_to_dev(event_type),
7179                                 &eq_entry->hbm_ecc_data);
7180                 break;
7181
7182         case GAUDI_EVENT_TPC0_DEC:
7183         case GAUDI_EVENT_TPC1_DEC:
7184         case GAUDI_EVENT_TPC2_DEC:
7185         case GAUDI_EVENT_TPC3_DEC:
7186         case GAUDI_EVENT_TPC4_DEC:
7187         case GAUDI_EVENT_TPC5_DEC:
7188         case GAUDI_EVENT_TPC6_DEC:
7189         case GAUDI_EVENT_TPC7_DEC:
7190                 gaudi_print_irq_info(hdev, event_type, true);
7191                 reset_required = gaudi_tpc_read_interrupts(hdev,
7192                                         tpc_dec_event_to_tpc_id(event_type),
7193                                         "AXI_SLV_DEC_Error");
7194                 if (reset_required) {
7195                         dev_err(hdev->dev, "hard reset required due to %s\n",
7196                                 gaudi_irq_map_table[event_type].name);
7197
7198                         if (hdev->hard_reset_on_fw_events)
7199                                 hl_device_reset(hdev, true, false);
7200                 } else {
7201                         hl_fw_unmask_irq(hdev, event_type);
7202                 }
7203                 break;
7204
7205         case GAUDI_EVENT_TPC0_KRN_ERR:
7206         case GAUDI_EVENT_TPC1_KRN_ERR:
7207         case GAUDI_EVENT_TPC2_KRN_ERR:
7208         case GAUDI_EVENT_TPC3_KRN_ERR:
7209         case GAUDI_EVENT_TPC4_KRN_ERR:
7210         case GAUDI_EVENT_TPC5_KRN_ERR:
7211         case GAUDI_EVENT_TPC6_KRN_ERR:
7212         case GAUDI_EVENT_TPC7_KRN_ERR:
7213                 gaudi_print_irq_info(hdev, event_type, true);
7214                 reset_required = gaudi_tpc_read_interrupts(hdev,
7215                                         tpc_krn_event_to_tpc_id(event_type),
7216                                         "KRN_ERR");
7217                 if (reset_required) {
7218                         dev_err(hdev->dev, "hard reset required due to %s\n",
7219                                 gaudi_irq_map_table[event_type].name);
7220
7221                         if (hdev->hard_reset_on_fw_events)
7222                                 hl_device_reset(hdev, true, false);
7223                 } else {
7224                         hl_fw_unmask_irq(hdev, event_type);
7225                 }
7226                 break;
7227
7228         case GAUDI_EVENT_PCIE_CORE_SERR:
7229         case GAUDI_EVENT_PCIE_IF_SERR:
7230         case GAUDI_EVENT_PCIE_PHY_SERR:
7231         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7232         case GAUDI_EVENT_MME0_ACC_SERR:
7233         case GAUDI_EVENT_MME0_SBAB_SERR:
7234         case GAUDI_EVENT_MME1_ACC_SERR:
7235         case GAUDI_EVENT_MME1_SBAB_SERR:
7236         case GAUDI_EVENT_MME2_ACC_SERR:
7237         case GAUDI_EVENT_MME2_SBAB_SERR:
7238         case GAUDI_EVENT_MME3_ACC_SERR:
7239         case GAUDI_EVENT_MME3_SBAB_SERR:
7240         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7241         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7242         case GAUDI_EVENT_PSOC_MEM_SERR:
7243         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7244         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7245         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7246         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7247                 fallthrough;
7248         case GAUDI_EVENT_MMU_SERR:
7249                 gaudi_print_irq_info(hdev, event_type, true);
7250                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7251                 hl_fw_unmask_irq(hdev, event_type);
7252                 break;
7253
7254         case GAUDI_EVENT_PCIE_DEC:
7255         case GAUDI_EVENT_MME0_WBC_RSP:
7256         case GAUDI_EVENT_MME0_SBAB0_RSP:
7257         case GAUDI_EVENT_MME1_WBC_RSP:
7258         case GAUDI_EVENT_MME1_SBAB0_RSP:
7259         case GAUDI_EVENT_MME2_WBC_RSP:
7260         case GAUDI_EVENT_MME2_SBAB0_RSP:
7261         case GAUDI_EVENT_MME3_WBC_RSP:
7262         case GAUDI_EVENT_MME3_SBAB0_RSP:
7263         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7264         case GAUDI_EVENT_PSOC_AXI_DEC:
7265         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7266         case GAUDI_EVENT_MMU_PAGE_FAULT:
7267         case GAUDI_EVENT_MMU_WR_PERM:
7268         case GAUDI_EVENT_RAZWI_OR_ADC:
7269         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7270         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7271         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7272                 fallthrough;
7273         case GAUDI_EVENT_NIC0_QM0:
7274         case GAUDI_EVENT_NIC0_QM1:
7275         case GAUDI_EVENT_NIC1_QM0:
7276         case GAUDI_EVENT_NIC1_QM1:
7277         case GAUDI_EVENT_NIC2_QM0:
7278         case GAUDI_EVENT_NIC2_QM1:
7279         case GAUDI_EVENT_NIC3_QM0:
7280         case GAUDI_EVENT_NIC3_QM1:
7281         case GAUDI_EVENT_NIC4_QM0:
7282         case GAUDI_EVENT_NIC4_QM1:
7283         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7284                 gaudi_print_irq_info(hdev, event_type, true);
7285                 gaudi_handle_qman_err(hdev, event_type);
7286                 hl_fw_unmask_irq(hdev, event_type);
7287                 break;
7288
7289         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7290                 gaudi_print_irq_info(hdev, event_type, true);
7291                 if (hdev->hard_reset_on_fw_events)
7292                         hl_device_reset(hdev, true, false);
7293                 break;
7294
7295         case GAUDI_EVENT_TPC0_BMON_SPMU:
7296         case GAUDI_EVENT_TPC1_BMON_SPMU:
7297         case GAUDI_EVENT_TPC2_BMON_SPMU:
7298         case GAUDI_EVENT_TPC3_BMON_SPMU:
7299         case GAUDI_EVENT_TPC4_BMON_SPMU:
7300         case GAUDI_EVENT_TPC5_BMON_SPMU:
7301         case GAUDI_EVENT_TPC6_BMON_SPMU:
7302         case GAUDI_EVENT_TPC7_BMON_SPMU:
7303         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7304                 gaudi_print_irq_info(hdev, event_type, false);
7305                 hl_fw_unmask_irq(hdev, event_type);
7306                 break;
7307
7308         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7309                 gaudi_print_clk_change_info(hdev, event_type);
7310                 hl_fw_unmask_irq(hdev, event_type);
7311                 break;
7312
7313         case GAUDI_EVENT_PSOC_GPIO_U16_0:
7314                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7315                 dev_err(hdev->dev,
7316                         "Received high temp H/W interrupt %d (cause %d)\n",
7317                         event_type, cause);
7318                 break;
7319
7320         default:
7321                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7322                                 event_type);
7323                 break;
7324         }
7325 }
7326
7327 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
7328                                         u32 *size)
7329 {
7330         struct gaudi_device *gaudi = hdev->asic_specific;
7331
7332         if (aggregate) {
7333                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7334                 return gaudi->events_stat_aggregate;
7335         }
7336
7337         *size = (u32) sizeof(gaudi->events_stat);
7338         return gaudi->events_stat;
7339 }
7340
7341 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
7342                                         u32 flags)
7343 {
7344         struct gaudi_device *gaudi = hdev->asic_specific;
7345         u32 status, timeout_usec;
7346         int rc;
7347
7348         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7349                 hdev->hard_reset_pending)
7350                 return 0;
7351
7352         if (hdev->pldm)
7353                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7354         else
7355                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7356
7357         mutex_lock(&hdev->mmu_cache_lock);
7358
7359         /* L0 & L1 invalidation */
7360         WREG32(mmSTLB_INV_PS, 3);
7361         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7362         WREG32(mmSTLB_INV_PS, 2);
7363
7364         rc = hl_poll_timeout(
7365                 hdev,
7366                 mmSTLB_INV_PS,
7367                 status,
7368                 !status,
7369                 1000,
7370                 timeout_usec);
7371
7372         WREG32(mmSTLB_INV_SET, 0);
7373
7374         mutex_unlock(&hdev->mmu_cache_lock);
7375
7376         if (rc) {
7377                 dev_err_ratelimited(hdev->dev,
7378                                         "MMU cache invalidation timeout\n");
7379                 hl_device_reset(hdev, true, false);
7380         }
7381
7382         return rc;
7383 }
7384
7385 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7386                                 bool is_hard, u32 asid, u64 va, u64 size)
7387 {
7388         struct gaudi_device *gaudi = hdev->asic_specific;
7389         u32 status, timeout_usec;
7390         u32 inv_data;
7391         u32 pi;
7392         int rc;
7393
7394         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7395                 hdev->hard_reset_pending)
7396                 return 0;
7397
7398         mutex_lock(&hdev->mmu_cache_lock);
7399
7400         if (hdev->pldm)
7401                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7402         else
7403                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7404
7405         /*
7406          * TODO: currently invalidate entire L0 & L1 as in regular hard
7407          * invalidation. Need to apply invalidation of specific cache
7408          * lines with mask of ASID & VA & size.
7409          * Note that L1 with be flushed entirely in any case.
7410          */
7411
7412         /* L0 & L1 invalidation */
7413         inv_data = RREG32(mmSTLB_CACHE_INV);
7414         /* PI is 8 bit */
7415         pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
7416         WREG32(mmSTLB_CACHE_INV,
7417                 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
7418
7419         rc = hl_poll_timeout(
7420                 hdev,
7421                 mmSTLB_INV_CONSUMER_INDEX,
7422                 status,
7423                 status == pi,
7424                 1000,
7425                 timeout_usec);
7426
7427         mutex_unlock(&hdev->mmu_cache_lock);
7428
7429         if (rc) {
7430                 dev_err_ratelimited(hdev->dev,
7431                                         "MMU cache invalidation timeout\n");
7432                 hl_device_reset(hdev, true, false);
7433         }
7434
7435         return rc;
7436 }
7437
7438 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
7439                                         u32 asid, u64 phys_addr)
7440 {
7441         u32 status, timeout_usec;
7442         int rc;
7443
7444         if (hdev->pldm)
7445                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7446         else
7447                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7448
7449         WREG32(MMU_ASID, asid);
7450         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7451         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7452         WREG32(MMU_BUSY, 0x80000000);
7453
7454         rc = hl_poll_timeout(
7455                 hdev,
7456                 MMU_BUSY,
7457                 status,
7458                 !(status & 0x80000000),
7459                 1000,
7460                 timeout_usec);
7461
7462         if (rc) {
7463                 dev_err(hdev->dev,
7464                         "Timeout during MMU hop0 config of asid %d\n", asid);
7465                 return rc;
7466         }
7467
7468         return 0;
7469 }
7470
7471 static int gaudi_send_heartbeat(struct hl_device *hdev)
7472 {
7473         struct gaudi_device *gaudi = hdev->asic_specific;
7474
7475         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7476                 return 0;
7477
7478         return hl_fw_send_heartbeat(hdev);
7479 }
7480
7481 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7482 {
7483         struct gaudi_device *gaudi = hdev->asic_specific;
7484         struct asic_fixed_properties *prop = &hdev->asic_prop;
7485         int rc;
7486
7487         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7488                 return 0;
7489
7490         rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
7491         if (rc)
7492                 return rc;
7493
7494         if (!strlen(prop->cpucp_info.card_name))
7495                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7496                                 CARD_NAME_MAX_LEN);
7497
7498         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
7499
7500         if (hdev->card_type == cpucp_card_type_pci)
7501                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
7502         else if (hdev->card_type == cpucp_card_type_pmc)
7503                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
7504
7505         hdev->max_power = prop->max_power_default;
7506
7507         return 0;
7508 }
7509
7510 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
7511                                         struct seq_file *s)
7512 {
7513         struct gaudi_device *gaudi = hdev->asic_specific;
7514         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
7515         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
7516         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
7517         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
7518         bool is_idle = true, is_eng_idle, is_slave;
7519         u64 offset;
7520         int i, dma_id, port;
7521
7522         mutex_lock(&gaudi->clk_gate_mutex);
7523
7524         hdev->asic_funcs->disable_clock_gating(hdev);
7525
7526         if (s)
7527                 seq_puts(s,
7528                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
7529                         "---  -------  ------------  ----------  -------------\n");
7530
7531         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
7532                 dma_id = gaudi_dma_assignment[i];
7533                 offset = dma_id * DMA_QMAN_OFFSET;
7534
7535                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
7536                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
7537                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
7538                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7539                                 IS_DMA_IDLE(dma_core_sts0);
7540                 is_idle &= is_eng_idle;
7541
7542                 if (mask)
7543                         *mask |= ((u64) !is_eng_idle) <<
7544                                         (GAUDI_ENGINE_ID_DMA_0 + dma_id);
7545                 if (s)
7546                         seq_printf(s, fmt, dma_id,
7547                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
7548                                 qm_cgm_sts, dma_core_sts0);
7549         }
7550
7551         if (s)
7552                 seq_puts(s,
7553                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
7554                         "---  -------  ------------  ----------  ----------\n");
7555
7556         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
7557                 offset = i * TPC_QMAN_OFFSET;
7558                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
7559                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
7560                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
7561                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
7562                                 IS_TPC_IDLE(tpc_cfg_sts);
7563                 is_idle &= is_eng_idle;
7564
7565                 if (mask)
7566                         *mask |= ((u64) !is_eng_idle) <<
7567                                                 (GAUDI_ENGINE_ID_TPC_0 + i);
7568                 if (s)
7569                         seq_printf(s, fmt, i,
7570                                 is_eng_idle ? "Y" : "N",
7571                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7572         }
7573
7574         if (s)
7575                 seq_puts(s,
7576                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
7577                         "---  -------  ------------  ----------  -----------\n");
7578
7579         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
7580                 offset = i * MME_QMAN_OFFSET;
7581                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
7582                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
7583
7584                 /* MME 1 & 3 are slaves, no need to check their QMANs */
7585                 is_slave = i % 2;
7586                 if (!is_slave) {
7587                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
7588                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
7589                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7590                 }
7591
7592                 is_idle &= is_eng_idle;
7593
7594                 if (mask)
7595                         *mask |= ((u64) !is_eng_idle) <<
7596                                                 (GAUDI_ENGINE_ID_MME_0 + i);
7597                 if (s) {
7598                         if (!is_slave)
7599                                 seq_printf(s, fmt, i,
7600                                         is_eng_idle ? "Y" : "N",
7601                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
7602                         else
7603                                 seq_printf(s, mme_slave_fmt, i,
7604                                         is_eng_idle ? "Y" : "N", "-",
7605                                         "-", mme_arch_sts);
7606                 }
7607         }
7608
7609         if (s)
7610                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
7611                                 "---  -------  ------------  ----------\n");
7612
7613         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
7614                 offset = i * NIC_MACRO_QMAN_OFFSET;
7615                 port = 2 * i;
7616                 if (hdev->nic_ports_mask & BIT(port)) {
7617                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7618                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7619                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7620                         is_idle &= is_eng_idle;
7621
7622                         if (mask)
7623                                 *mask |= ((u64) !is_eng_idle) <<
7624                                                 (GAUDI_ENGINE_ID_NIC_0 + port);
7625                         if (s)
7626                                 seq_printf(s, nic_fmt, port,
7627                                                 is_eng_idle ? "Y" : "N",
7628                                                 qm_glbl_sts0, qm_cgm_sts);
7629                 }
7630
7631                 port = 2 * i + 1;
7632                 if (hdev->nic_ports_mask & BIT(port)) {
7633                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
7634                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
7635                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
7636                         is_idle &= is_eng_idle;
7637
7638                         if (mask)
7639                                 *mask |= ((u64) !is_eng_idle) <<
7640                                                 (GAUDI_ENGINE_ID_NIC_0 + port);
7641                         if (s)
7642                                 seq_printf(s, nic_fmt, port,
7643                                                 is_eng_idle ? "Y" : "N",
7644                                                 qm_glbl_sts0, qm_cgm_sts);
7645                 }
7646         }
7647
7648         if (s)
7649                 seq_puts(s, "\n");
7650
7651         hdev->asic_funcs->set_clock_gating(hdev);
7652
7653         mutex_unlock(&gaudi->clk_gate_mutex);
7654
7655         return is_idle;
7656 }
7657
7658 static void gaudi_hw_queues_lock(struct hl_device *hdev)
7659         __acquires(&gaudi->hw_queues_lock)
7660 {
7661         struct gaudi_device *gaudi = hdev->asic_specific;
7662
7663         spin_lock(&gaudi->hw_queues_lock);
7664 }
7665
7666 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
7667         __releases(&gaudi->hw_queues_lock)
7668 {
7669         struct gaudi_device *gaudi = hdev->asic_specific;
7670
7671         spin_unlock(&gaudi->hw_queues_lock);
7672 }
7673
7674 static u32 gaudi_get_pci_id(struct hl_device *hdev)
7675 {
7676         return hdev->pdev->device;
7677 }
7678
7679 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
7680                                 size_t max_size)
7681 {
7682         struct gaudi_device *gaudi = hdev->asic_specific;
7683
7684         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7685                 return 0;
7686
7687         return hl_fw_get_eeprom_data(hdev, data, max_size);
7688 }
7689
7690 /*
7691  * this function should be used only during initialization and/or after reset,
7692  * when there are no active users.
7693  */
7694 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
7695                                 u32 tpc_id)
7696 {
7697         struct gaudi_device *gaudi = hdev->asic_specific;
7698         u64 kernel_timeout;
7699         u32 status, offset;
7700         int rc;
7701
7702         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
7703
7704         if (hdev->pldm)
7705                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
7706         else
7707                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
7708
7709         mutex_lock(&gaudi->clk_gate_mutex);
7710
7711         hdev->asic_funcs->disable_clock_gating(hdev);
7712
7713         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
7714                         lower_32_bits(tpc_kernel));
7715         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
7716                         upper_32_bits(tpc_kernel));
7717
7718         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
7719                         lower_32_bits(tpc_kernel));
7720         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
7721                         upper_32_bits(tpc_kernel));
7722         /* set a valid LUT pointer, content is of no significance */
7723         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
7724                         lower_32_bits(tpc_kernel));
7725         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
7726                         upper_32_bits(tpc_kernel));
7727
7728         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
7729                         lower_32_bits(CFG_BASE +
7730                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
7731
7732         WREG32(mmTPC0_CFG_TPC_CMD + offset,
7733                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
7734                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
7735         /* wait a bit for the engine to start executing */
7736         usleep_range(1000, 1500);
7737
7738         /* wait until engine has finished executing */
7739         rc = hl_poll_timeout(
7740                 hdev,
7741                 mmTPC0_CFG_STATUS + offset,
7742                 status,
7743                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7744                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7745                 1000,
7746                 kernel_timeout);
7747
7748         if (rc) {
7749                 dev_err(hdev->dev,
7750                         "Timeout while waiting for TPC%d icache prefetch\n",
7751                         tpc_id);
7752                 hdev->asic_funcs->set_clock_gating(hdev);
7753                 mutex_unlock(&gaudi->clk_gate_mutex);
7754                 return -EIO;
7755         }
7756
7757         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
7758                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
7759
7760         /* wait a bit for the engine to start executing */
7761         usleep_range(1000, 1500);
7762
7763         /* wait until engine has finished executing */
7764         rc = hl_poll_timeout(
7765                 hdev,
7766                 mmTPC0_CFG_STATUS + offset,
7767                 status,
7768                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
7769                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
7770                 1000,
7771                 kernel_timeout);
7772
7773         if (rc) {
7774                 dev_err(hdev->dev,
7775                         "Timeout while waiting for TPC%d vector pipe\n",
7776                         tpc_id);
7777                 hdev->asic_funcs->set_clock_gating(hdev);
7778                 mutex_unlock(&gaudi->clk_gate_mutex);
7779                 return -EIO;
7780         }
7781
7782         rc = hl_poll_timeout(
7783                 hdev,
7784                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
7785                 status,
7786                 (status == 0),
7787                 1000,
7788                 kernel_timeout);
7789
7790         hdev->asic_funcs->set_clock_gating(hdev);
7791         mutex_unlock(&gaudi->clk_gate_mutex);
7792
7793         if (rc) {
7794                 dev_err(hdev->dev,
7795                         "Timeout while waiting for TPC%d kernel to execute\n",
7796                         tpc_id);
7797                 return -EIO;
7798         }
7799
7800         return 0;
7801 }
7802
7803 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
7804                 struct hl_ctx *ctx)
7805 {
7806         struct gaudi_device *gaudi = hdev->asic_specific;
7807         int min_alloc_order, rc, collective_cb_size;
7808
7809         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7810                 return 0;
7811
7812         hdev->internal_cb_pool_virt_addr =
7813                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
7814                                         HOST_SPACE_INTERNAL_CB_SZ,
7815                                         &hdev->internal_cb_pool_dma_addr,
7816                                         GFP_KERNEL | __GFP_ZERO);
7817
7818         if (!hdev->internal_cb_pool_virt_addr)
7819                 return -ENOMEM;
7820
7821         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
7822                         sizeof(struct packet_fence);
7823         min_alloc_order = ilog2(collective_cb_size);
7824
7825         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
7826         if (!hdev->internal_cb_pool) {
7827                 dev_err(hdev->dev,
7828                         "Failed to create internal CB pool\n");
7829                 rc = -ENOMEM;
7830                 goto free_internal_cb_pool;
7831         }
7832
7833         rc = gen_pool_add(hdev->internal_cb_pool,
7834                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
7835                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
7836         if (rc) {
7837                 dev_err(hdev->dev,
7838                         "Failed to add memory to internal CB pool\n");
7839                 rc = -EFAULT;
7840                 goto destroy_internal_cb_pool;
7841         }
7842
7843         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
7844                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
7845                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
7846
7847         if (!hdev->internal_cb_va_base)
7848                 goto destroy_internal_cb_pool;
7849
7850         mutex_lock(&ctx->mmu_lock);
7851         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
7852                         hdev->internal_cb_pool_dma_addr,
7853                         HOST_SPACE_INTERNAL_CB_SZ);
7854
7855         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
7856         mutex_unlock(&ctx->mmu_lock);
7857
7858         if (rc)
7859                 goto unreserve_internal_cb_pool;
7860
7861         return 0;
7862
7863 unreserve_internal_cb_pool:
7864         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7865                         HOST_SPACE_INTERNAL_CB_SZ);
7866 destroy_internal_cb_pool:
7867         gen_pool_destroy(hdev->internal_cb_pool);
7868 free_internal_cb_pool:
7869         hdev->asic_funcs->asic_dma_free_coherent(hdev,
7870                         HOST_SPACE_INTERNAL_CB_SZ,
7871                         hdev->internal_cb_pool_virt_addr,
7872                         hdev->internal_cb_pool_dma_addr);
7873
7874         return rc;
7875 }
7876
7877 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
7878                 struct hl_ctx *ctx)
7879 {
7880         struct gaudi_device *gaudi = hdev->asic_specific;
7881
7882         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7883                 return;
7884
7885         mutex_lock(&ctx->mmu_lock);
7886         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
7887                         HOST_SPACE_INTERNAL_CB_SZ);
7888         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
7889                         HOST_SPACE_INTERNAL_CB_SZ);
7890         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
7891         mutex_unlock(&ctx->mmu_lock);
7892
7893         gen_pool_destroy(hdev->internal_cb_pool);
7894
7895         hdev->asic_funcs->asic_dma_free_coherent(hdev,
7896                         HOST_SPACE_INTERNAL_CB_SZ,
7897                         hdev->internal_cb_pool_virt_addr,
7898                         hdev->internal_cb_pool_dma_addr);
7899 }
7900
7901 static int gaudi_ctx_init(struct hl_ctx *ctx)
7902 {
7903         gaudi_mmu_prepare(ctx->hdev, ctx->asid);
7904         return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
7905 }
7906
7907 static void gaudi_ctx_fini(struct hl_ctx *ctx)
7908 {
7909         struct hl_device *hdev = ctx->hdev;
7910
7911         /* Gaudi will NEVER support more then a single compute context.
7912          * Therefore, don't clear anything unless it is the compute context
7913          */
7914         if (hdev->compute_ctx != ctx)
7915                 return;
7916
7917         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
7918 }
7919
7920 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
7921 {
7922         return gaudi_cq_assignment[cq_idx];
7923 }
7924
7925 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
7926 {
7927         return sizeof(struct packet_msg_short) +
7928                         sizeof(struct packet_msg_prot) * 2;
7929 }
7930
7931 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
7932 {
7933         return sizeof(struct packet_msg_short) * 4 +
7934                         sizeof(struct packet_fence) +
7935                         sizeof(struct packet_msg_prot) * 2;
7936 }
7937
7938 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
7939                                 u32 size)
7940 {
7941         struct hl_cb *cb = (struct hl_cb *) data;
7942         struct packet_msg_short *pkt;
7943         u32 value, ctl, pkt_size = sizeof(*pkt);
7944
7945         pkt = cb->kernel_address + size;
7946         memset(pkt, 0, pkt_size);
7947
7948         /* Inc by 1, Mode ADD */
7949         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
7950         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
7951
7952         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
7953         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
7954         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
7955         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7956         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
7957         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7958         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
7959
7960         pkt->value = cpu_to_le32(value);
7961         pkt->ctl = cpu_to_le32(ctl);
7962
7963         return size + pkt_size;
7964 }
7965
7966 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
7967                                         u16 addr)
7968 {
7969         u32 ctl, pkt_size = sizeof(*pkt);
7970
7971         memset(pkt, 0, pkt_size);
7972
7973         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
7974         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
7975         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
7976         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
7977         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
7978         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
7979
7980         pkt->value = cpu_to_le32(value);
7981         pkt->ctl = cpu_to_le32(ctl);
7982
7983         return pkt_size;
7984 }
7985
7986 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
7987                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
7988                 u16 sob_val, u16 mon_id)
7989 {
7990         u64 monitor_base;
7991         u32 ctl, value, pkt_size = sizeof(*pkt);
7992         u16 msg_addr_offset;
7993         u8 mask;
7994
7995         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
7996                 dev_err(hdev->dev,
7997                         "sob_base %u (mask %#x) is not valid\n",
7998                         sob_base, sob_mask);
7999                 return 0;
8000         }
8001
8002         /*
8003          * monitor_base should be the content of the base0 address registers,
8004          * so it will be added to the msg short offsets
8005          */
8006         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8007
8008         msg_addr_offset =
8009                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8010                                 monitor_base;
8011
8012         memset(pkt, 0, pkt_size);
8013
8014         /* Monitor config packet: bind the monitor to a sync object */
8015         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8016         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8017         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8018                         0); /* GREATER OR EQUAL*/
8019         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8020
8021         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8022         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8023         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8024         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8025         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
8026         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
8027         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
8028
8029         pkt->value = cpu_to_le32(value);
8030         pkt->ctl = cpu_to_le32(ctl);
8031
8032         return pkt_size;
8033 }
8034
8035 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8036 {
8037         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8038
8039         memset(pkt, 0, pkt_size);
8040
8041         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8042         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8043         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8044
8045         ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
8046         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
8047         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
8048         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
8049
8050         pkt->cfg = cpu_to_le32(cfg);
8051         pkt->ctl = cpu_to_le32(ctl);
8052
8053         return pkt_size;
8054 }
8055
8056 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8057 {
8058         u32 offset, nic_index;
8059
8060         switch (queue_id) {
8061         case GAUDI_QUEUE_ID_DMA_0_0:
8062                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8063                 break;
8064         case GAUDI_QUEUE_ID_DMA_0_1:
8065                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8066                 break;
8067         case GAUDI_QUEUE_ID_DMA_0_2:
8068                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8069                 break;
8070         case GAUDI_QUEUE_ID_DMA_0_3:
8071                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8072                 break;
8073         case GAUDI_QUEUE_ID_DMA_1_0:
8074                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8075                 break;
8076         case GAUDI_QUEUE_ID_DMA_1_1:
8077                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8078                 break;
8079         case GAUDI_QUEUE_ID_DMA_1_2:
8080                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8081                 break;
8082         case GAUDI_QUEUE_ID_DMA_1_3:
8083                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8084                 break;
8085         case GAUDI_QUEUE_ID_DMA_5_0:
8086                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8087                 break;
8088         case GAUDI_QUEUE_ID_DMA_5_1:
8089                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8090                 break;
8091         case GAUDI_QUEUE_ID_DMA_5_2:
8092                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8093                 break;
8094         case GAUDI_QUEUE_ID_DMA_5_3:
8095                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8096                 break;
8097         case GAUDI_QUEUE_ID_TPC_7_0:
8098                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8099                 break;
8100         case GAUDI_QUEUE_ID_TPC_7_1:
8101                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8102                 break;
8103         case GAUDI_QUEUE_ID_TPC_7_2:
8104                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8105                 break;
8106         case GAUDI_QUEUE_ID_TPC_7_3:
8107                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8108                 break;
8109         case GAUDI_QUEUE_ID_NIC_0_0:
8110         case GAUDI_QUEUE_ID_NIC_1_0:
8111         case GAUDI_QUEUE_ID_NIC_2_0:
8112         case GAUDI_QUEUE_ID_NIC_3_0:
8113         case GAUDI_QUEUE_ID_NIC_4_0:
8114         case GAUDI_QUEUE_ID_NIC_5_0:
8115         case GAUDI_QUEUE_ID_NIC_6_0:
8116         case GAUDI_QUEUE_ID_NIC_7_0:
8117         case GAUDI_QUEUE_ID_NIC_8_0:
8118         case GAUDI_QUEUE_ID_NIC_9_0:
8119                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8120                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8121                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8122                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8123                 break;
8124         case GAUDI_QUEUE_ID_NIC_0_1:
8125         case GAUDI_QUEUE_ID_NIC_1_1:
8126         case GAUDI_QUEUE_ID_NIC_2_1:
8127         case GAUDI_QUEUE_ID_NIC_3_1:
8128         case GAUDI_QUEUE_ID_NIC_4_1:
8129         case GAUDI_QUEUE_ID_NIC_5_1:
8130         case GAUDI_QUEUE_ID_NIC_6_1:
8131         case GAUDI_QUEUE_ID_NIC_7_1:
8132         case GAUDI_QUEUE_ID_NIC_8_1:
8133         case GAUDI_QUEUE_ID_NIC_9_1:
8134                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8135                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8136                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8137                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8138                 break;
8139         case GAUDI_QUEUE_ID_NIC_0_2:
8140         case GAUDI_QUEUE_ID_NIC_1_2:
8141         case GAUDI_QUEUE_ID_NIC_2_2:
8142         case GAUDI_QUEUE_ID_NIC_3_2:
8143         case GAUDI_QUEUE_ID_NIC_4_2:
8144         case GAUDI_QUEUE_ID_NIC_5_2:
8145         case GAUDI_QUEUE_ID_NIC_6_2:
8146         case GAUDI_QUEUE_ID_NIC_7_2:
8147         case GAUDI_QUEUE_ID_NIC_8_2:
8148         case GAUDI_QUEUE_ID_NIC_9_2:
8149                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8150                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8151                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8152                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8153                 break;
8154         case GAUDI_QUEUE_ID_NIC_0_3:
8155         case GAUDI_QUEUE_ID_NIC_1_3:
8156         case GAUDI_QUEUE_ID_NIC_2_3:
8157         case GAUDI_QUEUE_ID_NIC_3_3:
8158         case GAUDI_QUEUE_ID_NIC_4_3:
8159         case GAUDI_QUEUE_ID_NIC_5_3:
8160         case GAUDI_QUEUE_ID_NIC_6_3:
8161         case GAUDI_QUEUE_ID_NIC_7_3:
8162         case GAUDI_QUEUE_ID_NIC_8_3:
8163         case GAUDI_QUEUE_ID_NIC_9_3:
8164                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8165                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8166                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8167                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8168                 break;
8169         default:
8170                 return -EINVAL;
8171         }
8172
8173         *addr = CFG_BASE + offset;
8174
8175         return 0;
8176 }
8177
8178 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8179 {
8180         u64 monitor_base;
8181         u32 size = 0;
8182         u16 msg_addr_offset;
8183
8184         /*
8185          * monitor_base should be the content of the base0 address registers,
8186          * so it will be added to the msg short offsets
8187          */
8188         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8189
8190         /* First monitor config packet: low address of the sync */
8191         msg_addr_offset =
8192                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8193                                 monitor_base;
8194
8195         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8196                                         msg_addr_offset);
8197
8198         /* Second monitor config packet: high address of the sync */
8199         msg_addr_offset =
8200                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8201                                 monitor_base;
8202
8203         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8204                                         msg_addr_offset);
8205
8206         /*
8207          * Third monitor config packet: the payload, i.e. what to write when the
8208          * sync triggers
8209          */
8210         msg_addr_offset =
8211                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8212                                 monitor_base;
8213
8214         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8215
8216         return size;
8217 }
8218
8219 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8220                                 struct hl_gen_wait_properties *prop)
8221 {
8222         struct hl_cb *cb = (struct hl_cb *) prop->data;
8223         void *buf = cb->kernel_address;
8224         u64 fence_addr = 0;
8225         u32 size = prop->size;
8226
8227         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8228                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8229                                 prop->q_idx);
8230                 return 0;
8231         }
8232
8233         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8234         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8235                         prop->sob_mask, prop->sob_val, prop->mon_id);
8236         size += gaudi_add_fence_pkt(buf + size);
8237
8238         return size;
8239 }
8240
8241 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8242 {
8243         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8244
8245         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8246                 hw_sob->sob_id);
8247
8248         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
8249                 0);
8250
8251         kref_init(&hw_sob->kref);
8252 }
8253
8254 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8255 {
8256         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8257                                                         HL_POWER9_HOST_MAGIC) {
8258                 hdev->power9_64bit_dma_enable = 1;
8259                 hdev->dma_mask = 64;
8260         } else {
8261                 hdev->power9_64bit_dma_enable = 0;
8262                 hdev->dma_mask = 48;
8263         }
8264 }
8265
8266 static u64 gaudi_get_device_time(struct hl_device *hdev)
8267 {
8268         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8269
8270         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8271 }
8272
8273 static const struct hl_asic_funcs gaudi_funcs = {
8274         .early_init = gaudi_early_init,
8275         .early_fini = gaudi_early_fini,
8276         .late_init = gaudi_late_init,
8277         .late_fini = gaudi_late_fini,
8278         .sw_init = gaudi_sw_init,
8279         .sw_fini = gaudi_sw_fini,
8280         .hw_init = gaudi_hw_init,
8281         .hw_fini = gaudi_hw_fini,
8282         .halt_engines = gaudi_halt_engines,
8283         .suspend = gaudi_suspend,
8284         .resume = gaudi_resume,
8285         .cb_mmap = gaudi_cb_mmap,
8286         .ring_doorbell = gaudi_ring_doorbell,
8287         .pqe_write = gaudi_pqe_write,
8288         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8289         .asic_dma_free_coherent = gaudi_dma_free_coherent,
8290         .scrub_device_mem = gaudi_scrub_device_mem,
8291         .get_int_queue_base = gaudi_get_int_queue_base,
8292         .test_queues = gaudi_test_queues,
8293         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
8294         .asic_dma_pool_free = gaudi_dma_pool_free,
8295         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
8296         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
8297         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
8298         .cs_parser = gaudi_cs_parser,
8299         .asic_dma_map_sg = gaudi_dma_map_sg,
8300         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
8301         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
8302         .update_eq_ci = gaudi_update_eq_ci,
8303         .context_switch = gaudi_context_switch,
8304         .restore_phase_topology = gaudi_restore_phase_topology,
8305         .debugfs_read32 = gaudi_debugfs_read32,
8306         .debugfs_write32 = gaudi_debugfs_write32,
8307         .debugfs_read64 = gaudi_debugfs_read64,
8308         .debugfs_write64 = gaudi_debugfs_write64,
8309         .add_device_attr = gaudi_add_device_attr,
8310         .handle_eqe = gaudi_handle_eqe,
8311         .set_pll_profile = gaudi_set_pll_profile,
8312         .get_events_stat = gaudi_get_events_stat,
8313         .read_pte = gaudi_read_pte,
8314         .write_pte = gaudi_write_pte,
8315         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
8316         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
8317         .send_heartbeat = gaudi_send_heartbeat,
8318         .set_clock_gating = gaudi_set_clock_gating,
8319         .disable_clock_gating = gaudi_disable_clock_gating,
8320         .debug_coresight = gaudi_debug_coresight,
8321         .is_device_idle = gaudi_is_device_idle,
8322         .soft_reset_late_init = gaudi_soft_reset_late_init,
8323         .hw_queues_lock = gaudi_hw_queues_lock,
8324         .hw_queues_unlock = gaudi_hw_queues_unlock,
8325         .get_pci_id = gaudi_get_pci_id,
8326         .get_eeprom_data = gaudi_get_eeprom_data,
8327         .send_cpu_message = gaudi_send_cpu_message,
8328         .pci_bars_map = gaudi_pci_bars_map,
8329         .init_iatu = gaudi_init_iatu,
8330         .rreg = hl_rreg,
8331         .wreg = hl_wreg,
8332         .halt_coresight = gaudi_halt_coresight,
8333         .ctx_init = gaudi_ctx_init,
8334         .ctx_fini = gaudi_ctx_fini,
8335         .get_clk_rate = gaudi_get_clk_rate,
8336         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
8337         .read_device_fw_version = gaudi_read_device_fw_version,
8338         .load_firmware_to_device = gaudi_load_firmware_to_device,
8339         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
8340         .get_signal_cb_size = gaudi_get_signal_cb_size,
8341         .get_wait_cb_size = gaudi_get_wait_cb_size,
8342         .gen_signal_cb = gaudi_gen_signal_cb,
8343         .gen_wait_cb = gaudi_gen_wait_cb,
8344         .reset_sob = gaudi_reset_sob,
8345         .reset_sob_group = gaudi_reset_sob_group,
8346         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
8347         .get_device_time = gaudi_get_device_time,
8348         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
8349         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs
8350 };
8351
8352 /**
8353  * gaudi_set_asic_funcs - set GAUDI function pointers
8354  *
8355  * @hdev: pointer to hl_device structure
8356  *
8357  */
8358 void gaudi_set_asic_funcs(struct hl_device *hdev)
8359 {
8360         hdev->asic_funcs = &gaudi_funcs;
8361 }