Merge tag 'kvm-s390-next-5.15-1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
110                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
111                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
112                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
113                 "gaudi cpu eq"
114 };
115
116 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
117         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
118         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
119         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
120         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
121         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
122         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
123         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
124         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
125 };
126
127 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
128         [0] = GAUDI_QUEUE_ID_DMA_0_0,
129         [1] = GAUDI_QUEUE_ID_DMA_0_1,
130         [2] = GAUDI_QUEUE_ID_DMA_0_2,
131         [3] = GAUDI_QUEUE_ID_DMA_0_3,
132         [4] = GAUDI_QUEUE_ID_DMA_1_0,
133         [5] = GAUDI_QUEUE_ID_DMA_1_1,
134         [6] = GAUDI_QUEUE_ID_DMA_1_2,
135         [7] = GAUDI_QUEUE_ID_DMA_1_3,
136 };
137
138 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
139         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
140         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
141         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
142         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
143         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
144         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
145         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
146         [PACKET_FENCE]          = sizeof(struct packet_fence),
147         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
148         [PACKET_NOP]            = sizeof(struct packet_nop),
149         [PACKET_STOP]           = sizeof(struct packet_stop),
150         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
151         [PACKET_WAIT]           = sizeof(struct packet_wait),
152         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
153 };
154
155 static inline bool validate_packet_id(enum packet_id id)
156 {
157         switch (id) {
158         case PACKET_WREG_32:
159         case PACKET_WREG_BULK:
160         case PACKET_MSG_LONG:
161         case PACKET_MSG_SHORT:
162         case PACKET_CP_DMA:
163         case PACKET_REPEAT:
164         case PACKET_MSG_PROT:
165         case PACKET_FENCE:
166         case PACKET_LIN_DMA:
167         case PACKET_NOP:
168         case PACKET_STOP:
169         case PACKET_ARB_POINT:
170         case PACKET_WAIT:
171         case PACKET_LOAD_AND_EXE:
172                 return true;
173         default:
174                 return false;
175         }
176 }
177
178 static const char * const
179 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
180         "tpc_address_exceed_slm",
181         "tpc_div_by_0",
182         "tpc_spu_mac_overflow",
183         "tpc_spu_addsub_overflow",
184         "tpc_spu_abs_overflow",
185         "tpc_spu_fp_dst_nan_inf",
186         "tpc_spu_fp_dst_denorm",
187         "tpc_vpu_mac_overflow",
188         "tpc_vpu_addsub_overflow",
189         "tpc_vpu_abs_overflow",
190         "tpc_vpu_fp_dst_nan_inf",
191         "tpc_vpu_fp_dst_denorm",
192         "tpc_assertions",
193         "tpc_illegal_instruction",
194         "tpc_pc_wrap_around",
195         "tpc_qm_sw_err",
196         "tpc_hbw_rresp_err",
197         "tpc_hbw_bresp_err",
198         "tpc_lbw_rresp_err",
199         "tpc_lbw_bresp_err"
200 };
201
202 static const char * const
203 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
204         "PQ AXI HBW error",
205         "CQ AXI HBW error",
206         "CP AXI HBW error",
207         "CP error due to undefined OPCODE",
208         "CP encountered STOP OPCODE",
209         "CP AXI LBW error",
210         "CP WRREG32 or WRBULK returned error",
211         "N/A",
212         "FENCE 0 inc over max value and clipped",
213         "FENCE 1 inc over max value and clipped",
214         "FENCE 2 inc over max value and clipped",
215         "FENCE 3 inc over max value and clipped",
216         "FENCE 0 dec under min value and clipped",
217         "FENCE 1 dec under min value and clipped",
218         "FENCE 2 dec under min value and clipped",
219         "FENCE 3 dec under min value and clipped"
220 };
221
222 static const char * const
223 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
224         "Choice push while full error",
225         "Choice Q watchdog error",
226         "MSG AXI LBW returned with error"
227 };
228
229 enum gaudi_sm_sei_cause {
230         GAUDI_SM_SEI_SO_OVERFLOW,
231         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
232         GAUDI_SM_SEI_AXI_RESPONSE_ERR
233 };
234
235 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
236         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
237         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
244         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
245         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
246         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
349 };
350
351 struct ecc_info_extract_params {
352         u64 block_address;
353         u32 num_memories;
354         bool derr;
355         bool disable_clock_gating;
356 };
357
358 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
359                                                                 u64 phys_addr);
360 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
361                                         struct hl_cs_job *job);
362 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
363                                         u32 size, u64 val);
364 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
365                                         u32 num_regs, u32 val);
366 static int gaudi_schedule_register_memset(struct hl_device *hdev,
367                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
368 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
369                                 u32 tpc_id);
370 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
371 static int gaudi_cpucp_info_get(struct hl_device *hdev);
372 static void gaudi_disable_clock_gating(struct hl_device *hdev);
373 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
374 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
375                                 u32 size, bool eb);
376 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
377                                 struct hl_gen_wait_properties *prop);
378
379 static inline enum hl_collective_mode
380 get_collective_mode(struct hl_device *hdev, u32 queue_id)
381 {
382         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
383                 return HL_COLLECTIVE_MASTER;
384
385         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
386                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
387                 return HL_COLLECTIVE_SLAVE;
388
389         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
390                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
391                 return HL_COLLECTIVE_SLAVE;
392
393         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
394                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
395                 return HL_COLLECTIVE_SLAVE;
396
397         return HL_COLLECTIVE_NOT_SUPPORTED;
398 }
399
400 static inline void set_default_power_values(struct hl_device *hdev)
401 {
402         struct asic_fixed_properties *prop = &hdev->asic_prop;
403
404         if (hdev->card_type == cpucp_card_type_pmc) {
405                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
406                 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
407         } else {
408                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
409                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
410         }
411 }
412
413 static int gaudi_set_fixed_properties(struct hl_device *hdev)
414 {
415         struct asic_fixed_properties *prop = &hdev->asic_prop;
416         u32 num_sync_stream_queues = 0;
417         int i;
418
419         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
420         prop->hw_queues_props = kcalloc(prop->max_queues,
421                         sizeof(struct hw_queue_properties),
422                         GFP_KERNEL);
423
424         if (!prop->hw_queues_props)
425                 return -ENOMEM;
426
427         for (i = 0 ; i < prop->max_queues ; i++) {
428                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
429                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
430                         prop->hw_queues_props[i].driver_only = 0;
431                         prop->hw_queues_props[i].supports_sync_stream = 1;
432                         prop->hw_queues_props[i].cb_alloc_flags =
433                                 CB_ALLOC_KERNEL;
434                         num_sync_stream_queues++;
435                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
436                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
437                         prop->hw_queues_props[i].driver_only = 1;
438                         prop->hw_queues_props[i].supports_sync_stream = 0;
439                         prop->hw_queues_props[i].cb_alloc_flags =
440                                 CB_ALLOC_KERNEL;
441                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
442                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
443                         prop->hw_queues_props[i].driver_only = 0;
444                         prop->hw_queues_props[i].supports_sync_stream = 0;
445                         prop->hw_queues_props[i].cb_alloc_flags =
446                                 CB_ALLOC_USER;
447
448                 }
449                 prop->hw_queues_props[i].collective_mode =
450                                                 get_collective_mode(hdev, i);
451         }
452
453         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
454         prop->collective_first_sob = 0;
455         prop->collective_first_mon = 0;
456
457         /* 2 SOBs per internal queue stream are reserved for collective */
458         prop->sync_stream_first_sob =
459                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
460                         * QMAN_STREAMS * HL_RSVD_SOBS;
461
462         /* 1 monitor per internal queue stream are reserved for collective
463          * 2 monitors per external queue stream are reserved for collective
464          */
465         prop->sync_stream_first_mon =
466                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
467                         (NUMBER_OF_EXT_HW_QUEUES * 2);
468
469         prop->dram_base_address = DRAM_PHYS_BASE;
470         prop->dram_size = GAUDI_HBM_SIZE_32GB;
471         prop->dram_end_address = prop->dram_base_address +
472                                         prop->dram_size;
473         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
474
475         prop->sram_base_address = SRAM_BASE_ADDR;
476         prop->sram_size = SRAM_SIZE;
477         prop->sram_end_address = prop->sram_base_address +
478                                         prop->sram_size;
479         prop->sram_user_base_address = prop->sram_base_address +
480                                         SRAM_USER_BASE_OFFSET;
481
482         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
483         if (hdev->pldm)
484                 prop->mmu_pgt_size = 0x800000; /* 8MB */
485         else
486                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
487         prop->mmu_pte_size = HL_PTE_SIZE;
488         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
489         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
490         prop->dram_page_size = PAGE_SIZE_2MB;
491         prop->dram_supports_virtual_memory = false;
492
493         prop->pmmu.hop0_shift = HOP0_SHIFT;
494         prop->pmmu.hop1_shift = HOP1_SHIFT;
495         prop->pmmu.hop2_shift = HOP2_SHIFT;
496         prop->pmmu.hop3_shift = HOP3_SHIFT;
497         prop->pmmu.hop4_shift = HOP4_SHIFT;
498         prop->pmmu.hop0_mask = HOP0_MASK;
499         prop->pmmu.hop1_mask = HOP1_MASK;
500         prop->pmmu.hop2_mask = HOP2_MASK;
501         prop->pmmu.hop3_mask = HOP3_MASK;
502         prop->pmmu.hop4_mask = HOP4_MASK;
503         prop->pmmu.start_addr = VA_HOST_SPACE_START;
504         prop->pmmu.end_addr =
505                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
506         prop->pmmu.page_size = PAGE_SIZE_4KB;
507         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
508
509         /* PMMU and HPMMU are the same except of page size */
510         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
511         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
512
513         /* shifts and masks are the same in PMMU and DMMU */
514         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
515         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
516         prop->dmmu.end_addr = VA_HOST_SPACE_END;
517         prop->dmmu.page_size = PAGE_SIZE_2MB;
518
519         prop->cfg_size = CFG_SIZE;
520         prop->max_asid = MAX_ASID;
521         prop->num_of_events = GAUDI_EVENT_SIZE;
522         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
523
524         set_default_power_values(hdev);
525
526         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
527         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
528
529         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
530         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
531
532         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
533                                         CARD_NAME_MAX_LEN);
534
535         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
536
537         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
538                         prop->sync_stream_first_sob +
539                         (num_sync_stream_queues * HL_RSVD_SOBS);
540         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
541                         prop->sync_stream_first_mon +
542                         (num_sync_stream_queues * HL_RSVD_MONS);
543
544         prop->first_available_user_msix_interrupt = USHRT_MAX;
545
546         for (i = 0 ; i < HL_MAX_DCORES ; i++)
547                 prop->first_available_cq[i] = USHRT_MAX;
548
549         prop->fw_cpu_boot_dev_sts0_valid = false;
550         prop->fw_cpu_boot_dev_sts1_valid = false;
551         prop->hard_reset_done_by_fw = false;
552         prop->gic_interrupts_enable = true;
553
554         return 0;
555 }
556
557 static int gaudi_pci_bars_map(struct hl_device *hdev)
558 {
559         static const char * const name[] = {"SRAM", "CFG", "HBM"};
560         bool is_wc[3] = {false, false, true};
561         int rc;
562
563         rc = hl_pci_bars_map(hdev, name, is_wc);
564         if (rc)
565                 return rc;
566
567         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
568                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
569
570         return 0;
571 }
572
573 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
574 {
575         struct gaudi_device *gaudi = hdev->asic_specific;
576         struct hl_inbound_pci_region pci_region;
577         u64 old_addr = addr;
578         int rc;
579
580         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
581                 return old_addr;
582
583         if (hdev->asic_prop.iatu_done_by_fw)
584                 return U64_MAX;
585
586         /* Inbound Region 2 - Bar 4 - Point to HBM */
587         pci_region.mode = PCI_BAR_MATCH_MODE;
588         pci_region.bar = HBM_BAR_ID;
589         pci_region.addr = addr;
590         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
591         if (rc)
592                 return U64_MAX;
593
594         if (gaudi) {
595                 old_addr = gaudi->hbm_bar_cur_addr;
596                 gaudi->hbm_bar_cur_addr = addr;
597         }
598
599         return old_addr;
600 }
601
602 static int gaudi_init_iatu(struct hl_device *hdev)
603 {
604         struct hl_inbound_pci_region inbound_region;
605         struct hl_outbound_pci_region outbound_region;
606         int rc;
607
608         if (hdev->asic_prop.iatu_done_by_fw)
609                 return 0;
610
611         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
612         inbound_region.mode = PCI_BAR_MATCH_MODE;
613         inbound_region.bar = SRAM_BAR_ID;
614         inbound_region.addr = SRAM_BASE_ADDR;
615         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
616         if (rc)
617                 goto done;
618
619         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
620         inbound_region.mode = PCI_BAR_MATCH_MODE;
621         inbound_region.bar = CFG_BAR_ID;
622         inbound_region.addr = SPI_FLASH_BASE_ADDR;
623         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
624         if (rc)
625                 goto done;
626
627         /* Inbound Region 2 - Bar 4 - Point to HBM */
628         inbound_region.mode = PCI_BAR_MATCH_MODE;
629         inbound_region.bar = HBM_BAR_ID;
630         inbound_region.addr = DRAM_PHYS_BASE;
631         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
632         if (rc)
633                 goto done;
634
635         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
636
637         /* Outbound Region 0 - Point to Host */
638         outbound_region.addr = HOST_PHYS_BASE;
639         outbound_region.size = HOST_PHYS_SIZE;
640         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
641
642 done:
643         return rc;
644 }
645
646 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
647 {
648         return RREG32(mmHW_STATE);
649 }
650
651 static int gaudi_early_init(struct hl_device *hdev)
652 {
653         struct asic_fixed_properties *prop = &hdev->asic_prop;
654         struct pci_dev *pdev = hdev->pdev;
655         u32 fw_boot_status;
656         int rc;
657
658         rc = gaudi_set_fixed_properties(hdev);
659         if (rc) {
660                 dev_err(hdev->dev, "Failed setting fixed properties\n");
661                 return rc;
662         }
663
664         /* Check BAR sizes */
665         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
666                 dev_err(hdev->dev,
667                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
668                         SRAM_BAR_ID,
669                         (unsigned long long) pci_resource_len(pdev,
670                                                         SRAM_BAR_ID),
671                         SRAM_BAR_SIZE);
672                 rc = -ENODEV;
673                 goto free_queue_props;
674         }
675
676         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
677                 dev_err(hdev->dev,
678                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
679                         CFG_BAR_ID,
680                         (unsigned long long) pci_resource_len(pdev,
681                                                                 CFG_BAR_ID),
682                         CFG_BAR_SIZE);
683                 rc = -ENODEV;
684                 goto free_queue_props;
685         }
686
687         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
688
689         /* If FW security is enabled at this point it means no access to ELBI */
690         if (hdev->asic_prop.fw_security_enabled) {
691                 hdev->asic_prop.iatu_done_by_fw = true;
692
693                 /*
694                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
695                  * decision can only be taken based on PCI ID security.
696                  */
697                 hdev->asic_prop.gic_interrupts_enable = false;
698                 goto pci_init;
699         }
700
701         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
702                                 &fw_boot_status);
703         if (rc)
704                 goto free_queue_props;
705
706         /* Check whether FW is configuring iATU */
707         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
708                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
709                 hdev->asic_prop.iatu_done_by_fw = true;
710
711 pci_init:
712         rc = hl_pci_init(hdev);
713         if (rc)
714                 goto free_queue_props;
715
716         /* Before continuing in the initialization, we need to read the preboot
717          * version to determine whether we run with a security-enabled firmware
718          */
719         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
720                                         mmCPU_BOOT_DEV_STS0,
721                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
722                                         mmCPU_BOOT_ERR1,
723                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
724         if (rc) {
725                 if (hdev->reset_on_preboot_fail)
726                         hdev->asic_funcs->hw_fini(hdev, true);
727                 goto pci_fini;
728         }
729
730         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
731                 dev_info(hdev->dev,
732                         "H/W state is dirty, must reset before initializing\n");
733                 hdev->asic_funcs->hw_fini(hdev, true);
734         }
735
736         return 0;
737
738 pci_fini:
739         hl_pci_fini(hdev);
740 free_queue_props:
741         kfree(hdev->asic_prop.hw_queues_props);
742         return rc;
743 }
744
745 static int gaudi_early_fini(struct hl_device *hdev)
746 {
747         kfree(hdev->asic_prop.hw_queues_props);
748         hl_pci_fini(hdev);
749
750         return 0;
751 }
752
753 /**
754  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
755  *
756  * @hdev: pointer to hl_device structure
757  *
758  */
759 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
760 {
761         struct asic_fixed_properties *prop = &hdev->asic_prop;
762         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
763         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
764         int rc;
765
766         if (hdev->asic_prop.fw_security_enabled) {
767                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
768
769                 if (rc)
770                         return rc;
771
772                 freq = pll_freq_arr[2];
773         } else {
774                 /* Backward compatibility */
775                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
776                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
777                 nr = RREG32(mmPSOC_CPU_PLL_NR);
778                 nf = RREG32(mmPSOC_CPU_PLL_NF);
779                 od = RREG32(mmPSOC_CPU_PLL_OD);
780
781                 if (div_sel == DIV_SEL_REF_CLK ||
782                                 div_sel == DIV_SEL_DIVIDED_REF) {
783                         if (div_sel == DIV_SEL_REF_CLK)
784                                 freq = PLL_REF_CLK;
785                         else
786                                 freq = PLL_REF_CLK / (div_fctr + 1);
787                 } else if (div_sel == DIV_SEL_PLL_CLK ||
788                         div_sel == DIV_SEL_DIVIDED_PLL) {
789                         pll_clk = PLL_REF_CLK * (nf + 1) /
790                                         ((nr + 1) * (od + 1));
791                         if (div_sel == DIV_SEL_PLL_CLK)
792                                 freq = pll_clk;
793                         else
794                                 freq = pll_clk / (div_fctr + 1);
795                 } else {
796                         dev_warn(hdev->dev,
797                                 "Received invalid div select value: %d",
798                                 div_sel);
799                         freq = 0;
800                 }
801         }
802
803         prop->psoc_timestamp_frequency = freq;
804         prop->psoc_pci_pll_nr = nr;
805         prop->psoc_pci_pll_nf = nf;
806         prop->psoc_pci_pll_od = od;
807         prop->psoc_pci_pll_div_factor = div_fctr;
808
809         return 0;
810 }
811
812 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
813                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
814 {
815         struct asic_fixed_properties *prop = &hdev->asic_prop;
816         struct packet_lin_dma *init_tpc_mem_pkt;
817         struct hl_cs_job *job;
818         struct hl_cb *cb;
819         u64 dst_addr;
820         u32 cb_size, ctl;
821         u8 tpc_id;
822         int rc;
823
824         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
825         if (!cb)
826                 return -EFAULT;
827
828         init_tpc_mem_pkt = cb->kernel_address;
829         cb_size = sizeof(*init_tpc_mem_pkt);
830         memset(init_tpc_mem_pkt, 0, cb_size);
831
832         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
833
834         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
835         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
836         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
837         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
838
839         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
840
841         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
842         dst_addr = (prop->sram_user_base_address &
843                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
844                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
845         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
846
847         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
848         if (!job) {
849                 dev_err(hdev->dev, "Failed to allocate a new job\n");
850                 rc = -ENOMEM;
851                 goto release_cb;
852         }
853
854         job->id = 0;
855         job->user_cb = cb;
856         atomic_inc(&job->user_cb->cs_cnt);
857         job->user_cb_size = cb_size;
858         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
859         job->patched_cb = job->user_cb;
860         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
861
862         hl_debugfs_add_job(hdev, job);
863
864         rc = gaudi_send_job_on_qman0(hdev, job);
865
866         if (rc)
867                 goto free_job;
868
869         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
870                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
871                 if (rc)
872                         break;
873         }
874
875 free_job:
876         hl_userptr_delete_list(hdev, &job->userptr_list);
877         hl_debugfs_remove_job(hdev, job);
878         kfree(job);
879         atomic_dec(&cb->cs_cnt);
880
881 release_cb:
882         hl_cb_put(cb);
883         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
884
885         return rc;
886 }
887
888 /*
889  * gaudi_init_tpc_mem() - Initialize TPC memories.
890  * @hdev: Pointer to hl_device structure.
891  *
892  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
893  *
894  * Return: 0 for success, negative value for error.
895  */
896 static int gaudi_init_tpc_mem(struct hl_device *hdev)
897 {
898         const struct firmware *fw;
899         size_t fw_size;
900         void *cpu_addr;
901         dma_addr_t dma_handle;
902         int rc, count = 5;
903
904 again:
905         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
906         if (rc == -EINTR && count-- > 0) {
907                 msleep(50);
908                 goto again;
909         }
910
911         if (rc) {
912                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
913                                 GAUDI_TPC_FW_FILE);
914                 goto out;
915         }
916
917         fw_size = fw->size;
918         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
919                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
920         if (!cpu_addr) {
921                 dev_err(hdev->dev,
922                         "Failed to allocate %zu of dma memory for TPC kernel\n",
923                         fw_size);
924                 rc = -ENOMEM;
925                 goto out;
926         }
927
928         memcpy(cpu_addr, fw->data, fw_size);
929
930         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
931
932         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
933                         dma_handle);
934
935 out:
936         release_firmware(fw);
937         return rc;
938 }
939
940 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
941 {
942         struct gaudi_device *gaudi = hdev->asic_specific;
943         struct gaudi_collective_properties *prop = &gaudi->collective_props;
944         struct hl_hw_queue *q;
945         u32 i, sob_id, sob_group_id, queue_id;
946
947         /* Iterate through SOB groups and assign a SOB for each slave queue */
948         sob_group_id =
949                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
950         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
951
952         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
953         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
954                 q = &hdev->kernel_queues[queue_id + (4 * i)];
955                 q->sync_stream_prop.collective_sob_id = sob_id + i;
956         }
957
958         /* Both DMA5 and TPC7 use the same resources since only a single
959          * engine need to participate in the reduction process
960          */
961         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
962         q = &hdev->kernel_queues[queue_id];
963         q->sync_stream_prop.collective_sob_id =
964                         sob_id + NIC_NUMBER_OF_ENGINES;
965
966         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
967         q = &hdev->kernel_queues[queue_id];
968         q->sync_stream_prop.collective_sob_id =
969                         sob_id + NIC_NUMBER_OF_ENGINES;
970 }
971
972 static void gaudi_sob_group_hw_reset(struct kref *ref)
973 {
974         struct gaudi_hw_sob_group *hw_sob_group =
975                 container_of(ref, struct gaudi_hw_sob_group, kref);
976         struct hl_device *hdev = hw_sob_group->hdev;
977         u64 base_addr;
978         int rc;
979
980         base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
981                         hw_sob_group->base_sob_id * 4;
982         rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
983                         base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
984         if (rc)
985                 dev_err(hdev->dev,
986                         "failed resetting sob group - sob base %u, count %u",
987                         hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
988
989         kref_init(&hw_sob_group->kref);
990 }
991
992 static void gaudi_sob_group_reset_error(struct kref *ref)
993 {
994         struct gaudi_hw_sob_group *hw_sob_group =
995                 container_of(ref, struct gaudi_hw_sob_group, kref);
996         struct hl_device *hdev = hw_sob_group->hdev;
997
998         dev_crit(hdev->dev,
999                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1000                 hw_sob_group->base_sob_id);
1001 }
1002
1003 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1004 {
1005         struct gaudi_collective_properties *prop;
1006         int i;
1007
1008         prop = &gaudi->collective_props;
1009
1010         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1011
1012         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1013                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1014                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1015                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1016         /* Set collective engine bit */
1017         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1018                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1019 }
1020
1021 static int gaudi_collective_init(struct hl_device *hdev)
1022 {
1023         u32 i, sob_id, reserved_sobs_per_group;
1024         struct gaudi_collective_properties *prop;
1025         struct gaudi_device *gaudi;
1026
1027         gaudi = hdev->asic_specific;
1028         prop = &gaudi->collective_props;
1029         sob_id = hdev->asic_prop.collective_first_sob;
1030
1031         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1032         reserved_sobs_per_group =
1033                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1034
1035         /* Init SOB groups */
1036         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1037                 prop->hw_sob_group[i].hdev = hdev;
1038                 prop->hw_sob_group[i].base_sob_id = sob_id;
1039                 sob_id += reserved_sobs_per_group;
1040                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1041         }
1042
1043         for (i = 0 ; i < QMAN_STREAMS; i++) {
1044                 prop->next_sob_group_val[i] = 1;
1045                 prop->curr_sob_group_idx[i] = 0;
1046                 gaudi_collective_map_sobs(hdev, i);
1047         }
1048
1049         gaudi_collective_mstr_sob_mask_set(gaudi);
1050
1051         return 0;
1052 }
1053
1054 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1055 {
1056         struct gaudi_device *gaudi = hdev->asic_specific;
1057         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1058
1059         kref_put(&cprop->hw_sob_group[sob_group].kref,
1060                                         gaudi_sob_group_hw_reset);
1061 }
1062
1063 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1064                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1065 {
1066         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1067         struct gaudi_collective_properties *cprop;
1068         struct hl_gen_wait_properties wait_prop;
1069         struct hl_sync_stream_properties *prop;
1070         struct gaudi_device *gaudi;
1071
1072         gaudi = hdev->asic_specific;
1073         cprop = &gaudi->collective_props;
1074         queue_id = job->hw_queue_id;
1075         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1076
1077         master_sob_base =
1078                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1079         master_monitor = prop->collective_mstr_mon_id[0];
1080
1081         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1082
1083         dev_dbg(hdev->dev,
1084                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1085                 master_sob_base, cprop->mstr_sob_mask[0],
1086                 cprop->next_sob_group_val[stream],
1087                 master_monitor, queue_id);
1088
1089         wait_prop.data = (void *) job->patched_cb;
1090         wait_prop.sob_base = master_sob_base;
1091         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1092         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1093         wait_prop.mon_id = master_monitor;
1094         wait_prop.q_idx = queue_id;
1095         wait_prop.size = cb_size;
1096         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1097
1098         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1099         master_monitor = prop->collective_mstr_mon_id[1];
1100
1101         dev_dbg(hdev->dev,
1102                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1103                 master_sob_base, cprop->mstr_sob_mask[1],
1104                 cprop->next_sob_group_val[stream],
1105                 master_monitor, queue_id);
1106
1107         wait_prop.sob_base = master_sob_base;
1108         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1109         wait_prop.mon_id = master_monitor;
1110         wait_prop.size = cb_size;
1111         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1112 }
1113
1114 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1115                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1116 {
1117         struct hl_gen_wait_properties wait_prop;
1118         struct hl_sync_stream_properties *prop;
1119         u32 queue_id, cb_size = 0;
1120
1121         queue_id = job->hw_queue_id;
1122         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1123
1124         /* Add to wait CBs using slave monitor */
1125         wait_prop.data = (void *) job->user_cb;
1126         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1127         wait_prop.sob_mask = 0x1;
1128         wait_prop.sob_val = cs_cmpl->sob_val;
1129         wait_prop.mon_id = prop->collective_slave_mon_id;
1130         wait_prop.q_idx = queue_id;
1131         wait_prop.size = cb_size;
1132
1133         dev_dbg(hdev->dev,
1134                 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1135                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1136                 prop->collective_slave_mon_id, queue_id);
1137
1138         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1139
1140         dev_dbg(hdev->dev,
1141                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1142                 prop->collective_sob_id, queue_id);
1143
1144         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1145                         prop->collective_sob_id, cb_size, false);
1146 }
1147
1148 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1149 {
1150         struct hl_cs_compl *signal_cs_cmpl =
1151                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1152         struct hl_cs_compl *cs_cmpl =
1153                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1154         struct gaudi_collective_properties *cprop;
1155         u32 stream, queue_id, sob_group_offset;
1156         struct gaudi_device *gaudi;
1157         struct hl_device *hdev;
1158         struct hl_cs_job *job;
1159         struct hl_ctx *ctx;
1160
1161         ctx = cs->ctx;
1162         hdev = ctx->hdev;
1163         gaudi = hdev->asic_specific;
1164         cprop = &gaudi->collective_props;
1165
1166         /* copy the SOB id and value of the signal CS */
1167         cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1168         cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1169
1170         /* Calculate the stream from collective master queue (1st job) */
1171         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1172         stream = job->hw_queue_id % 4;
1173         sob_group_offset =
1174                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1175
1176         list_for_each_entry(job, &cs->job_list, cs_node) {
1177                 queue_id = job->hw_queue_id;
1178
1179                 if (hdev->kernel_queues[queue_id].collective_mode ==
1180                                 HL_COLLECTIVE_MASTER)
1181                         gaudi_collective_master_init_job(hdev, job, stream,
1182                                                 sob_group_offset);
1183                 else
1184                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1185         }
1186
1187         cs_cmpl->sob_group = sob_group_offset;
1188
1189         /* Handle sob group kref and wraparound */
1190         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1191         cprop->next_sob_group_val[stream]++;
1192
1193         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1194                 /*
1195                  * Decrement as we reached the max value.
1196                  * The release function won't be called here as we've
1197                  * just incremented the refcount.
1198                  */
1199                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1200                                 gaudi_sob_group_reset_error);
1201                 cprop->next_sob_group_val[stream] = 1;
1202                 /* only two SOBs are currently in use */
1203                 cprop->curr_sob_group_idx[stream] =
1204                         (cprop->curr_sob_group_idx[stream] + 1) &
1205                                                         (HL_RSVD_SOBS - 1);
1206
1207                 gaudi_collective_map_sobs(hdev, stream);
1208
1209                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1210                                 cprop->curr_sob_group_idx[stream], stream);
1211         }
1212
1213         /* Increment kref since all slave queues are now waiting on it */
1214         kref_get(&cs_cmpl->hw_sob->kref);
1215         /*
1216          * Must put the signal fence after the SOB refcnt increment so
1217          * the SOB refcnt won't turn 0 and reset the SOB before the
1218          * wait CS was submitted.
1219          */
1220         mb();
1221         hl_fence_put(cs->signal_fence);
1222         cs->signal_fence = NULL;
1223 }
1224
1225 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1226                 struct hl_ctx *ctx, struct hl_cs *cs,
1227                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1228 {
1229         struct hw_queue_properties *hw_queue_prop;
1230         struct hl_cs_counters_atomic *cntr;
1231         struct hl_cs_job *job;
1232         struct hl_cb *cb;
1233         u32 cb_size;
1234         bool patched_cb;
1235
1236         cntr = &hdev->aggregated_cs_counters;
1237
1238         if (mode == HL_COLLECTIVE_MASTER) {
1239                 /* CB size of collective master queue contains
1240                  * 4 msg short packets for monitor 1 configuration
1241                  * 1 fence packet
1242                  * 4 msg short packets for monitor 2 configuration
1243                  * 1 fence packet
1244                  * 2 msg prot packets for completion and MSI-X
1245                  */
1246                 cb_size = sizeof(struct packet_msg_short) * 8 +
1247                                 sizeof(struct packet_fence) * 2 +
1248                                 sizeof(struct packet_msg_prot) * 2;
1249                 patched_cb = true;
1250         } else {
1251                 /* CB size of collective slave queues contains
1252                  * 4 msg short packets for monitor configuration
1253                  * 1 fence packet
1254                  * 1 additional msg short packet for sob signal
1255                  */
1256                 cb_size = sizeof(struct packet_msg_short) * 5 +
1257                                 sizeof(struct packet_fence);
1258                 patched_cb = false;
1259         }
1260
1261         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1262         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1263         if (!job) {
1264                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1265                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1266                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1267                 return -ENOMEM;
1268         }
1269
1270         /* Allocate internal mapped CB for non patched CBs */
1271         cb = hl_cb_kernel_create(hdev, cb_size,
1272                         hdev->mmu_enable && !patched_cb);
1273         if (!cb) {
1274                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1275                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1276                 kfree(job);
1277                 return -EFAULT;
1278         }
1279
1280         job->id = 0;
1281         job->cs = cs;
1282         job->user_cb = cb;
1283         atomic_inc(&job->user_cb->cs_cnt);
1284         job->user_cb_size = cb_size;
1285         job->hw_queue_id = queue_id;
1286
1287         /*
1288          * No need in parsing, user CB is the patched CB.
1289          * We call hl_cb_destroy() out of two reasons - we don't need
1290          * the CB in the CB idr anymore and to decrement its refcount as
1291          * it was incremented inside hl_cb_kernel_create().
1292          */
1293         if (patched_cb)
1294                 job->patched_cb = job->user_cb;
1295         else
1296                 job->patched_cb = NULL;
1297
1298         job->job_cb_size = job->user_cb_size;
1299         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1300
1301         /* increment refcount as for external queues we get completion */
1302         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1303                 cs_get(cs);
1304
1305         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1306
1307         list_add_tail(&job->cs_node, &cs->job_list);
1308
1309         hl_debugfs_add_job(hdev, job);
1310
1311         return 0;
1312 }
1313
1314 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1315                 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1316                 u32 collective_engine_id)
1317 {
1318         struct gaudi_device *gaudi = hdev->asic_specific;
1319         struct hw_queue_properties *hw_queue_prop;
1320         u32 queue_id, collective_queue, num_jobs;
1321         u32 stream, nic_queue, nic_idx = 0;
1322         bool skip;
1323         int i, rc = 0;
1324
1325         /* Verify wait queue id is configured as master */
1326         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1327         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1328                 dev_err(hdev->dev,
1329                         "Queue %d is not configured as collective master\n",
1330                         wait_queue_id);
1331                 return -EINVAL;
1332         }
1333
1334         /* Verify engine id is supported */
1335         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1336                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1337                 dev_err(hdev->dev,
1338                         "Collective wait does not support engine %u\n",
1339                         collective_engine_id);
1340                 return -EINVAL;
1341         }
1342
1343         stream = wait_queue_id % 4;
1344
1345         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1346                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1347         else
1348                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1349
1350         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1351         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1352
1353         /* First job goes to the collective master queue, it will wait for
1354          * the collective slave queues to finish execution.
1355          * The synchronization is done using two monitors:
1356          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1357          * reduction engine (DMA5/TPC7).
1358          *
1359          * Rest of the jobs goes to the collective slave queues which will
1360          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1361          */
1362         for (i = 0 ; i < num_jobs ; i++) {
1363                 if (i == 0) {
1364                         queue_id = wait_queue_id;
1365                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1366                                 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1367                 } else {
1368                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1369                                 if (gaudi->hw_cap_initialized &
1370                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1371                                         skip = false;
1372                                 else
1373                                         skip = true;
1374
1375                                 queue_id = nic_queue;
1376                                 nic_queue += 4;
1377                                 nic_idx++;
1378
1379                                 if (skip)
1380                                         continue;
1381                         } else {
1382                                 queue_id = collective_queue;
1383                         }
1384
1385                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1386                                 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1387                 }
1388
1389                 if (rc)
1390                         return rc;
1391         }
1392
1393         return rc;
1394 }
1395
1396 static int gaudi_late_init(struct hl_device *hdev)
1397 {
1398         struct gaudi_device *gaudi = hdev->asic_specific;
1399         int rc;
1400
1401         rc = gaudi->cpucp_info_get(hdev);
1402         if (rc) {
1403                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1404                 return rc;
1405         }
1406
1407         if ((hdev->card_type == cpucp_card_type_pci) &&
1408                         (hdev->nic_ports_mask & 0x3)) {
1409                 dev_info(hdev->dev,
1410                         "PCI card detected, only 8 ports are enabled\n");
1411                 hdev->nic_ports_mask &= ~0x3;
1412
1413                 /* Stop and disable unused NIC QMANs */
1414                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1415                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1416                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1417
1418                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1419                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1420                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1421
1422                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1423                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1424
1425                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1426         }
1427
1428         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1429         if (rc) {
1430                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1431                 return rc;
1432         }
1433
1434         rc = gaudi_fetch_psoc_frequency(hdev);
1435         if (rc) {
1436                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1437                 goto disable_pci_access;
1438         }
1439
1440         rc = gaudi_mmu_clear_pgt_range(hdev);
1441         if (rc) {
1442                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1443                 goto disable_pci_access;
1444         }
1445
1446         rc = gaudi_init_tpc_mem(hdev);
1447         if (rc) {
1448                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1449                 goto disable_pci_access;
1450         }
1451
1452         rc = gaudi_collective_init(hdev);
1453         if (rc) {
1454                 dev_err(hdev->dev, "Failed to init collective\n");
1455                 goto disable_pci_access;
1456         }
1457
1458         return 0;
1459
1460 disable_pci_access:
1461         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1462
1463         return rc;
1464 }
1465
1466 static void gaudi_late_fini(struct hl_device *hdev)
1467 {
1468         const struct hwmon_channel_info **channel_info_arr;
1469         int i = 0;
1470
1471         if (!hdev->hl_chip_info->info)
1472                 return;
1473
1474         channel_info_arr = hdev->hl_chip_info->info;
1475
1476         while (channel_info_arr[i]) {
1477                 kfree(channel_info_arr[i]->config);
1478                 kfree(channel_info_arr[i]);
1479                 i++;
1480         }
1481
1482         kfree(channel_info_arr);
1483
1484         hdev->hl_chip_info->info = NULL;
1485 }
1486
1487 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1488 {
1489         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1490         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1491         int i, j, rc = 0;
1492
1493         /*
1494          * The device CPU works with 40-bits addresses, while bit 39 must be set
1495          * to '1' when accessing the host.
1496          * Bits 49:39 of the full host address are saved for a later
1497          * configuration of the HW to perform extension to 50 bits.
1498          * Because there is a single HW register that holds the extension bits,
1499          * these bits must be identical in all allocated range.
1500          */
1501
1502         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1503                 virt_addr_arr[i] =
1504                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1505                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1506                                                 &dma_addr_arr[i],
1507                                                 GFP_KERNEL | __GFP_ZERO);
1508                 if (!virt_addr_arr[i]) {
1509                         rc = -ENOMEM;
1510                         goto free_dma_mem_arr;
1511                 }
1512
1513                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1514                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1515                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1516                         break;
1517         }
1518
1519         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1520                 dev_err(hdev->dev,
1521                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1522                 rc = -EFAULT;
1523                 goto free_dma_mem_arr;
1524         }
1525
1526         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1527         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1528         hdev->cpu_pci_msb_addr =
1529                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1530
1531         if (!hdev->asic_prop.fw_security_enabled)
1532                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1533
1534 free_dma_mem_arr:
1535         for (j = 0 ; j < i ; j++)
1536                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1537                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1538                                                 virt_addr_arr[j],
1539                                                 dma_addr_arr[j]);
1540
1541         return rc;
1542 }
1543
1544 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1545 {
1546         struct gaudi_device *gaudi = hdev->asic_specific;
1547         struct gaudi_internal_qman_info *q;
1548         u32 i;
1549
1550         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1551                 q = &gaudi->internal_qmans[i];
1552                 if (!q->pq_kernel_addr)
1553                         continue;
1554                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1555                                                         q->pq_kernel_addr,
1556                                                         q->pq_dma_addr);
1557         }
1558 }
1559
1560 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1561 {
1562         struct gaudi_device *gaudi = hdev->asic_specific;
1563         struct gaudi_internal_qman_info *q;
1564         int rc, i;
1565
1566         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1567                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1568                         continue;
1569
1570                 q = &gaudi->internal_qmans[i];
1571
1572                 switch (i) {
1573                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1574                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1575                         break;
1576                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1577                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1578                         break;
1579                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1580                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1581                         break;
1582                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1583                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1584                         break;
1585                 default:
1586                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1587                         rc = -EINVAL;
1588                         goto free_internal_qmans_pq_mem;
1589                 }
1590
1591                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1592                                                 hdev, q->pq_size,
1593                                                 &q->pq_dma_addr,
1594                                                 GFP_KERNEL | __GFP_ZERO);
1595                 if (!q->pq_kernel_addr) {
1596                         rc = -ENOMEM;
1597                         goto free_internal_qmans_pq_mem;
1598                 }
1599         }
1600
1601         return 0;
1602
1603 free_internal_qmans_pq_mem:
1604         gaudi_free_internal_qmans_pq_mem(hdev);
1605         return rc;
1606 }
1607
1608 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1609 {
1610         struct asic_fixed_properties *prop = &hdev->asic_prop;
1611         struct pci_mem_region *region;
1612
1613         /* CFG */
1614         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1615         region->region_base = CFG_BASE;
1616         region->region_size = CFG_SIZE;
1617         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1618         region->bar_size = CFG_BAR_SIZE;
1619         region->bar_id = CFG_BAR_ID;
1620         region->used = 1;
1621
1622         /* SRAM */
1623         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1624         region->region_base = SRAM_BASE_ADDR;
1625         region->region_size = SRAM_SIZE;
1626         region->offset_in_bar = 0;
1627         region->bar_size = SRAM_BAR_SIZE;
1628         region->bar_id = SRAM_BAR_ID;
1629         region->used = 1;
1630
1631         /* DRAM */
1632         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1633         region->region_base = DRAM_PHYS_BASE;
1634         region->region_size = hdev->asic_prop.dram_size;
1635         region->offset_in_bar = 0;
1636         region->bar_size = prop->dram_pci_bar_size;
1637         region->bar_id = HBM_BAR_ID;
1638         region->used = 1;
1639
1640         /* SP SRAM */
1641         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1642         region->region_base = PSOC_SCRATCHPAD_ADDR;
1643         region->region_size = PSOC_SCRATCHPAD_SIZE;
1644         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1645         region->bar_size = CFG_BAR_SIZE;
1646         region->bar_id = CFG_BAR_ID;
1647         region->used = 1;
1648 }
1649
1650 static int gaudi_sw_init(struct hl_device *hdev)
1651 {
1652         struct gaudi_device *gaudi;
1653         u32 i, event_id = 0;
1654         int rc;
1655
1656         /* Allocate device structure */
1657         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1658         if (!gaudi)
1659                 return -ENOMEM;
1660
1661         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1662                 if (gaudi_irq_map_table[i].valid) {
1663                         if (event_id == GAUDI_EVENT_SIZE) {
1664                                 dev_err(hdev->dev,
1665                                         "Event array exceeds the limit of %u events\n",
1666                                         GAUDI_EVENT_SIZE);
1667                                 rc = -EINVAL;
1668                                 goto free_gaudi_device;
1669                         }
1670
1671                         gaudi->events[event_id++] =
1672                                         gaudi_irq_map_table[i].fc_id;
1673                 }
1674         }
1675
1676         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1677
1678         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1679
1680         hdev->asic_specific = gaudi;
1681
1682         /* Create DMA pool for small allocations */
1683         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1684                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1685         if (!hdev->dma_pool) {
1686                 dev_err(hdev->dev, "failed to create DMA pool\n");
1687                 rc = -ENOMEM;
1688                 goto free_gaudi_device;
1689         }
1690
1691         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1692         if (rc)
1693                 goto free_dma_pool;
1694
1695         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1696         if (!hdev->cpu_accessible_dma_pool) {
1697                 dev_err(hdev->dev,
1698                         "Failed to create CPU accessible DMA pool\n");
1699                 rc = -ENOMEM;
1700                 goto free_cpu_dma_mem;
1701         }
1702
1703         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1704                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1705                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1706         if (rc) {
1707                 dev_err(hdev->dev,
1708                         "Failed to add memory to CPU accessible DMA pool\n");
1709                 rc = -EFAULT;
1710                 goto free_cpu_accessible_dma_pool;
1711         }
1712
1713         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1714         if (rc)
1715                 goto free_cpu_accessible_dma_pool;
1716
1717         spin_lock_init(&gaudi->hw_queues_lock);
1718         mutex_init(&gaudi->clk_gate_mutex);
1719
1720         hdev->supports_sync_stream = true;
1721         hdev->supports_coresight = true;
1722         hdev->supports_staged_submission = true;
1723
1724         gaudi_set_pci_memory_regions(hdev);
1725
1726         return 0;
1727
1728 free_cpu_accessible_dma_pool:
1729         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1730 free_cpu_dma_mem:
1731         if (!hdev->asic_prop.fw_security_enabled)
1732                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1733                                         hdev->cpu_pci_msb_addr);
1734         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1735                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1736                         hdev->cpu_accessible_dma_mem,
1737                         hdev->cpu_accessible_dma_address);
1738 free_dma_pool:
1739         dma_pool_destroy(hdev->dma_pool);
1740 free_gaudi_device:
1741         kfree(gaudi);
1742         return rc;
1743 }
1744
1745 static int gaudi_sw_fini(struct hl_device *hdev)
1746 {
1747         struct gaudi_device *gaudi = hdev->asic_specific;
1748
1749         gaudi_free_internal_qmans_pq_mem(hdev);
1750
1751         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1752
1753         if (!hdev->asic_prop.fw_security_enabled)
1754                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1755                                         hdev->cpu_pci_msb_addr);
1756
1757         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1758                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1759                         hdev->cpu_accessible_dma_mem,
1760                         hdev->cpu_accessible_dma_address);
1761
1762         dma_pool_destroy(hdev->dma_pool);
1763
1764         mutex_destroy(&gaudi->clk_gate_mutex);
1765
1766         kfree(gaudi);
1767
1768         return 0;
1769 }
1770
1771 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1772 {
1773         struct hl_device *hdev = arg;
1774         int i;
1775
1776         if (hdev->disabled)
1777                 return IRQ_HANDLED;
1778
1779         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1780                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1781
1782         hl_irq_handler_eq(irq, &hdev->event_queue);
1783
1784         return IRQ_HANDLED;
1785 }
1786
1787 /*
1788  * For backward compatibility, new MSI interrupts should be set after the
1789  * existing CPU and NIC interrupts.
1790  */
1791 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1792                                 bool cpu_eq)
1793 {
1794         int msi_vec;
1795
1796         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1797                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1798                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1799
1800         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1801                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1802
1803         return pci_irq_vector(hdev->pdev, msi_vec);
1804 }
1805
1806 static int gaudi_enable_msi_single(struct hl_device *hdev)
1807 {
1808         int rc, irq;
1809
1810         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1811
1812         irq = gaudi_pci_irq_vector(hdev, 0, false);
1813         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1814                         "gaudi single msi", hdev);
1815         if (rc)
1816                 dev_err(hdev->dev,
1817                         "Failed to request single MSI IRQ\n");
1818
1819         return rc;
1820 }
1821
1822 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1823 {
1824         int cq_cnt = hdev->asic_prop.completion_queues_count;
1825         int rc, i, irq_cnt_init, irq;
1826
1827         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1828                 irq = gaudi_pci_irq_vector(hdev, i, false);
1829                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1830                                 &hdev->completion_queue[i]);
1831                 if (rc) {
1832                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1833                         goto free_irqs;
1834                 }
1835         }
1836
1837         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1838         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1839                                 &hdev->event_queue);
1840         if (rc) {
1841                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1842                 goto free_irqs;
1843         }
1844
1845         return 0;
1846
1847 free_irqs:
1848         for (i = 0 ; i < irq_cnt_init ; i++)
1849                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1850                                 &hdev->completion_queue[i]);
1851         return rc;
1852 }
1853
1854 static int gaudi_enable_msi(struct hl_device *hdev)
1855 {
1856         struct gaudi_device *gaudi = hdev->asic_specific;
1857         int rc;
1858
1859         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1860                 return 0;
1861
1862         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1863         if (rc < 0) {
1864                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1865                 return rc;
1866         }
1867
1868         if (rc < NUMBER_OF_INTERRUPTS) {
1869                 gaudi->multi_msi_mode = false;
1870                 rc = gaudi_enable_msi_single(hdev);
1871         } else {
1872                 gaudi->multi_msi_mode = true;
1873                 rc = gaudi_enable_msi_multi(hdev);
1874         }
1875
1876         if (rc)
1877                 goto free_pci_irq_vectors;
1878
1879         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1880
1881         return 0;
1882
1883 free_pci_irq_vectors:
1884         pci_free_irq_vectors(hdev->pdev);
1885         return rc;
1886 }
1887
1888 static void gaudi_sync_irqs(struct hl_device *hdev)
1889 {
1890         struct gaudi_device *gaudi = hdev->asic_specific;
1891         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1892
1893         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1894                 return;
1895
1896         /* Wait for all pending IRQs to be finished */
1897         if (gaudi->multi_msi_mode) {
1898                 for (i = 0 ; i < cq_cnt ; i++)
1899                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1900
1901                 synchronize_irq(gaudi_pci_irq_vector(hdev,
1902                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
1903                                                 true));
1904         } else {
1905                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1906         }
1907 }
1908
1909 static void gaudi_disable_msi(struct hl_device *hdev)
1910 {
1911         struct gaudi_device *gaudi = hdev->asic_specific;
1912         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1913
1914         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1915                 return;
1916
1917         gaudi_sync_irqs(hdev);
1918
1919         if (gaudi->multi_msi_mode) {
1920                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1921                                                 true);
1922                 free_irq(irq, &hdev->event_queue);
1923
1924                 for (i = 0 ; i < cq_cnt ; i++) {
1925                         irq = gaudi_pci_irq_vector(hdev, i, false);
1926                         free_irq(irq, &hdev->completion_queue[i]);
1927                 }
1928         } else {
1929                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1930         }
1931
1932         pci_free_irq_vectors(hdev->pdev);
1933
1934         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1935 }
1936
1937 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1938 {
1939         struct gaudi_device *gaudi = hdev->asic_specific;
1940
1941         if (hdev->asic_prop.fw_security_enabled)
1942                 return;
1943
1944         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
1945                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
1946                 return;
1947
1948         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1949                 return;
1950
1951         if (!hdev->sram_scrambler_enable)
1952                 return;
1953
1954         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1955                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1956         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1957                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1959                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1961                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1963                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1964         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1965                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1966         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1967                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1968         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1969                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1970
1971         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1972                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1973         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1974                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1975         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1976                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1977         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1978                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1979         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1980                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1981         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1982                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1983         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1984                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1985         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1986                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1987
1988         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1989                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1990         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1991                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1992         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1993                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1994         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1995                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1996         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1997                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1998         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1999                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2000         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2001                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2002         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2003                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2004
2005         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2006 }
2007
2008 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2009 {
2010         struct gaudi_device *gaudi = hdev->asic_specific;
2011
2012         if (hdev->asic_prop.fw_security_enabled)
2013                 return;
2014
2015         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2016                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2017                 return;
2018
2019         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2020                 return;
2021
2022         if (!hdev->dram_scrambler_enable)
2023                 return;
2024
2025         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2026                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2027         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2028                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2029         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2030                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2031         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2032                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2033         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2034                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2035         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2036                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2037         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2038                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2039         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2040                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2041
2042         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2043                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2044         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2045                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2046         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2047                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2048         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2049                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2050         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2051                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2052         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2053                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2054         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2055                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2056         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2057                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2058
2059         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2060                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2061         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2062                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2063         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2064                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2065         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2066                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2067         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2068                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2069         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2070                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2071         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2072                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2073         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2074                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2075
2076         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2077 }
2078
2079 static void gaudi_init_e2e(struct hl_device *hdev)
2080 {
2081         if (hdev->asic_prop.fw_security_enabled)
2082                 return;
2083
2084         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2085                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2086                 return;
2087
2088         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2089         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2090         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2091         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2092
2093         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2094         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2095         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2096         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2097
2098         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2099         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2100         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2101         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2102
2103         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2104         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2105         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2106         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2107
2108         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2109         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2110         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2111         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2112
2113         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2114         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2115         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2116         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2117
2118         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2119         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2120         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2121         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2122
2123         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2124         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2125         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2126         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2127
2128         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2129         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2130         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2131         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2132
2133         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2134         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2135         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2136         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2137
2138         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2139         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2140         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2141         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2142
2143         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2144         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2145         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2146         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2147
2148         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2149         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2150         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2151         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2152
2153         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2154         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2155         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2156         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2157
2158         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2159         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2160         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2161         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2162
2163         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2164         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2165         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2166         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2167
2168         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2169         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2170         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2171         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2172
2173         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2174         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2175         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2176         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2177
2178         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2179         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2180         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2181         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2182
2183         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2184         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2185         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2186         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2187
2188         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2189         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2190         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2191         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2192
2193         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2194         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2195         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2196         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2197
2198         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2199         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2200         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2201         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2202
2203         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2204         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2205         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2206         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2207
2208         if (!hdev->dram_scrambler_enable) {
2209                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2210                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2211                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2212                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2213
2214                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2215                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2216                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2217                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2218
2219                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2220                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2221                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2222                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2223
2224                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2225                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2226                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2227                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2228
2229                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2230                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2231                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2232                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2233
2234                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2235                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2236                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2237                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2238
2239                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2240                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2241                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2242                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2243
2244                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2245                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2246                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2247                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2248
2249                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2250                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2251                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2252                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2253
2254                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2255                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2256                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2257                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2258
2259                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2260                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2261                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2262                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2263
2264                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2265                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2266                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2267                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2268
2269                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2270                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2271                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2272                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2273
2274                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2275                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2276                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2277                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2278
2279                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2280                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2281                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2282                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2283
2284                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2285                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2286                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2287                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2288
2289                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2290                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2291                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2292                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2293
2294                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2295                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2296                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2297                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2298
2299                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2300                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2301                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2302                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2303
2304                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2305                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2306                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2307                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2308
2309                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2310                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2311                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2312                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2313
2314                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2315                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2316                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2317                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2318
2319                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2320                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2321                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2322                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2323
2324                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2325                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2326                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2327                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2328         }
2329
2330         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2331                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2332         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2333                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2334
2335         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2336                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2337         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2338                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339
2340         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2341                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2343                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344
2345         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2346                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2348                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349
2350         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2351                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2353                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354
2355         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2356                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2358                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359
2360         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2361                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2363                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364
2365         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2366                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2368                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369
2370         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2371                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2373                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374
2375         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2376                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2378                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379
2380         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2381                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2383                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384
2385         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2386                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2388                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389
2390         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2391                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2393                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394
2395         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2396                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2398                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399
2400         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2401                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2403                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404
2405         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2406                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2408                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409
2410         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2411                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2412         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2413                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2414
2415         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2416                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2417         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2418                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419
2420         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2421                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2423                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424
2425         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2426                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2428                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429
2430         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2431                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2433                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434
2435         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2436                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2438                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439
2440         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2441                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2443                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444
2445         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2446                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2448                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449 }
2450
2451 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2452 {
2453         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2454
2455         if (hdev->asic_prop.fw_security_enabled)
2456                 return;
2457
2458         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2459                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2460                 return;
2461
2462         hbm0_wr = 0x33333333;
2463         hbm0_rd = 0x77777777;
2464         hbm1_wr = 0x55555555;
2465         hbm1_rd = 0xDDDDDDDD;
2466
2467         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2468         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2469         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2470         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2471
2472         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2473         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2474         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2475         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2476
2477         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2478         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2479         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2480         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2481
2482         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2483         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2484         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2485         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2486
2487         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2488                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2489                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2490         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2491                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2494                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2497                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499
2500         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2501                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2502                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2503         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2504                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2507                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2510                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512 }
2513
2514 static void gaudi_init_golden_registers(struct hl_device *hdev)
2515 {
2516         u32 tpc_offset;
2517         int tpc_id, i;
2518
2519         gaudi_init_e2e(hdev);
2520         gaudi_init_hbm_cred(hdev);
2521
2522         for (tpc_id = 0, tpc_offset = 0;
2523                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2524                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2525                 /* Mask all arithmetic interrupts from TPC */
2526                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2527                 /* Set 16 cache lines */
2528                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2529                                 ICACHE_FETCH_LINE_NUM, 2);
2530         }
2531
2532         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2533         for (i = 0 ; i < 128 ; i += 8)
2534                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2535
2536         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2537         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2538         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 }
2541
2542 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2543                                         int qman_id, dma_addr_t qman_pq_addr)
2544 {
2545         struct cpu_dyn_regs *dyn_regs =
2546                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2547         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2548         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2549         u32 q_off, dma_qm_offset;
2550         u32 dma_qm_err_cfg, irq_handler_offset;
2551
2552         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2553
2554         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2555                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2556         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2557                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558         so_base_en_lo = lower_32_bits(CFG_BASE +
2559                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2560         so_base_en_hi = upper_32_bits(CFG_BASE +
2561                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2563                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2564         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2565                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566         so_base_ws_lo = lower_32_bits(CFG_BASE +
2567                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2568         so_base_ws_hi = upper_32_bits(CFG_BASE +
2569                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570
2571         q_off = dma_qm_offset + qman_id * 4;
2572
2573         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2574         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2575
2576         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2577         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2578         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2579
2580         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2581         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2582                                                         QMAN_LDMA_SRC_OFFSET);
2583         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2584                                                         QMAN_LDMA_DST_OFFSET);
2585
2586         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2587         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2588         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2589         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2590         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2591         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2592         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2593         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2594
2595         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2596
2597         /* The following configuration is needed only once per QMAN */
2598         if (qman_id == 0) {
2599                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2600                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2601                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2602
2603                 /* Configure RAZWI IRQ */
2604                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2605                 if (hdev->stop_on_err)
2606                         dma_qm_err_cfg |=
2607                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2608
2609                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2610
2611                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2612                         lower_32_bits(CFG_BASE + irq_handler_offset));
2613                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2614                         upper_32_bits(CFG_BASE + irq_handler_offset));
2615
2616                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2617                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2618                                                                         dma_id);
2619
2620                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2621                                 QM_ARB_ERR_MSG_EN_MASK);
2622
2623                 /* Increase ARB WDT to support streams architecture */
2624                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2625                                 GAUDI_ARB_WDT_TIMEOUT);
2626
2627                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2628                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2629
2630                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2631         }
2632 }
2633
2634 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2635 {
2636         struct cpu_dyn_regs *dyn_regs =
2637                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2638         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2639         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2640         u32 irq_handler_offset;
2641
2642         /* Set to maximum possible according to physical size */
2643         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2644         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2645
2646         /* WA for H/W bug H3-2116 */
2647         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2648
2649         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2650         if (hdev->stop_on_err)
2651                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2652
2653         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2654
2655         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2656                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2657                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2658
2659         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2660                 lower_32_bits(CFG_BASE + irq_handler_offset));
2661         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2662                 upper_32_bits(CFG_BASE + irq_handler_offset));
2663
2664         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2665                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2666         WREG32(mmDMA0_CORE_PROT + dma_offset,
2667                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2668         /* If the channel is secured, it should be in MMU bypass mode */
2669         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2670                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2671         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2672 }
2673
2674 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2675                                 u32 enable_mask)
2676 {
2677         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2678
2679         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2680 }
2681
2682 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2683 {
2684         struct gaudi_device *gaudi = hdev->asic_specific;
2685         struct hl_hw_queue *q;
2686         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2687
2688         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2689                 return;
2690
2691         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2692                 dma_id = gaudi_dma_assignment[i];
2693                 /*
2694                  * For queues after the CPU Q need to add 1 to get the correct
2695                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2696                  * order to get the correct MSI register.
2697                  */
2698                 if (dma_id > 1) {
2699                         cpu_skip = 1;
2700                         nic_skip = NIC_NUMBER_OF_ENGINES;
2701                 } else {
2702                         cpu_skip = 0;
2703                         nic_skip = 0;
2704                 }
2705
2706                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2707                         q_idx = 4 * dma_id + j + cpu_skip;
2708                         q = &hdev->kernel_queues[q_idx];
2709                         q->cq_id = cq_id++;
2710                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2711                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2712                                                 q->bus_address);
2713                 }
2714
2715                 gaudi_init_dma_core(hdev, dma_id);
2716
2717                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2718         }
2719
2720         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2721 }
2722
2723 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2724                                         int qman_id, u64 qman_base_addr)
2725 {
2726         struct cpu_dyn_regs *dyn_regs =
2727                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2728         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2729         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2730         u32 dma_qm_err_cfg, irq_handler_offset;
2731         u32 q_off, dma_qm_offset;
2732
2733         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2734
2735         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2736                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2737         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2738                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739         so_base_en_lo = lower_32_bits(CFG_BASE +
2740                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2741         so_base_en_hi = upper_32_bits(CFG_BASE +
2742                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2744                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2745         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2746                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747         so_base_ws_lo = lower_32_bits(CFG_BASE +
2748                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2749         so_base_ws_hi = upper_32_bits(CFG_BASE +
2750                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751
2752         q_off = dma_qm_offset + qman_id * 4;
2753
2754         if (qman_id < 4) {
2755                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2756                                         lower_32_bits(qman_base_addr));
2757                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2758                                         upper_32_bits(qman_base_addr));
2759
2760                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2761                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2762                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2763
2764                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2765                                                         QMAN_CPDMA_SIZE_OFFSET);
2766                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2767                                                         QMAN_CPDMA_SRC_OFFSET);
2768                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2769                                                         QMAN_CPDMA_DST_OFFSET);
2770         } else {
2771                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2772                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2773                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2774
2775                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2776                                                         QMAN_LDMA_SIZE_OFFSET);
2777                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2778                                                         QMAN_LDMA_SRC_OFFSET);
2779                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2780                                                         QMAN_LDMA_DST_OFFSET);
2781
2782                 /* Configure RAZWI IRQ */
2783                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2784                 if (hdev->stop_on_err)
2785                         dma_qm_err_cfg |=
2786                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2787
2788                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2789
2790                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2791                         lower_32_bits(CFG_BASE + irq_handler_offset));
2792                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2793                         upper_32_bits(CFG_BASE + irq_handler_offset));
2794
2795                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2796                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2797                                                                         dma_id);
2798
2799                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2800                                 QM_ARB_ERR_MSG_EN_MASK);
2801
2802                 /* Increase ARB WDT to support streams architecture */
2803                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2804                                 GAUDI_ARB_WDT_TIMEOUT);
2805
2806                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808                                 QMAN_INTERNAL_MAKE_TRUSTED);
2809         }
2810
2811         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815
2816         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819                                 mtr_base_ws_lo);
2820                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821                                 mtr_base_ws_hi);
2822                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823                                 so_base_ws_lo);
2824                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825                                 so_base_ws_hi);
2826         }
2827 }
2828
2829 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830 {
2831         struct gaudi_device *gaudi = hdev->asic_specific;
2832         struct gaudi_internal_qman_info *q;
2833         u64 qman_base_addr;
2834         int i, j, dma_id, internal_q_index;
2835
2836         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837                 return;
2838
2839         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841
2842                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843                          /*
2844                           * Add the CPU queue in order to get the correct queue
2845                           * number as all internal queue are placed after it
2846                           */
2847                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848
2849                         q = &gaudi->internal_qmans[internal_q_index];
2850                         qman_base_addr = (u64) q->pq_dma_addr;
2851                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852                                                 qman_base_addr);
2853                 }
2854
2855                 /* Initializing lower CP for HBM DMA QMAN */
2856                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857
2858                 gaudi_init_dma_core(hdev, dma_id);
2859
2860                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861         }
2862
2863         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864 }
2865
2866 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867                                         int qman_id, u64 qman_base_addr)
2868 {
2869         struct cpu_dyn_regs *dyn_regs =
2870                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871         u32 mtr_base_lo, mtr_base_hi;
2872         u32 so_base_lo, so_base_hi;
2873         u32 irq_handler_offset;
2874         u32 q_off, mme_id;
2875         u32 mme_qm_err_cfg;
2876
2877         mtr_base_lo = lower_32_bits(CFG_BASE +
2878                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879         mtr_base_hi = upper_32_bits(CFG_BASE +
2880                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881         so_base_lo = lower_32_bits(CFG_BASE +
2882                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883         so_base_hi = upper_32_bits(CFG_BASE +
2884                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886         q_off = mme_offset + qman_id * 4;
2887
2888         if (qman_id < 4) {
2889                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890                                         lower_32_bits(qman_base_addr));
2891                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892                                         upper_32_bits(qman_base_addr));
2893
2894                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897
2898                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899                                                         QMAN_CPDMA_SIZE_OFFSET);
2900                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901                                                         QMAN_CPDMA_SRC_OFFSET);
2902                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903                                                         QMAN_CPDMA_DST_OFFSET);
2904         } else {
2905                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908
2909                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910                                                         QMAN_LDMA_SIZE_OFFSET);
2911                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912                                                         QMAN_LDMA_SRC_OFFSET);
2913                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914                                                         QMAN_LDMA_DST_OFFSET);
2915
2916                 /* Configure RAZWI IRQ */
2917                 mme_id = mme_offset /
2918                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919
2920                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921                 if (hdev->stop_on_err)
2922                         mme_qm_err_cfg |=
2923                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924
2925                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926
2927                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928                         lower_32_bits(CFG_BASE + irq_handler_offset));
2929                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930                         upper_32_bits(CFG_BASE + irq_handler_offset));
2931
2932                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934                                                                         mme_id);
2935
2936                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937                                 QM_ARB_ERR_MSG_EN_MASK);
2938
2939                 /* Increase ARB WDT to support streams architecture */
2940                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2941                                 GAUDI_ARB_WDT_TIMEOUT);
2942
2943                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945                                 QMAN_INTERNAL_MAKE_TRUSTED);
2946         }
2947
2948         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952 }
2953
2954 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955 {
2956         struct gaudi_device *gaudi = hdev->asic_specific;
2957         struct gaudi_internal_qman_info *q;
2958         u64 qman_base_addr;
2959         u32 mme_offset;
2960         int i, internal_q_index;
2961
2962         if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963                 return;
2964
2965         /*
2966          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968          */
2969
2970         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974                 q = &gaudi->internal_qmans[internal_q_index];
2975                 qman_base_addr = (u64) q->pq_dma_addr;
2976                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977                                         qman_base_addr);
2978                 if (i == 3)
2979                         mme_offset = 0;
2980         }
2981
2982         /* Initializing lower CP for MME QMANs */
2983         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985         gaudi_init_mme_qman(hdev, 0, 4, 0);
2986
2987         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990         gaudi->hw_cap_initialized |= HW_CAP_MME;
2991 }
2992
2993 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994                                 int qman_id, u64 qman_base_addr)
2995 {
2996         struct cpu_dyn_regs *dyn_regs =
2997                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000         u32 tpc_qm_err_cfg, irq_handler_offset;
3001         u32 q_off, tpc_id;
3002
3003         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007         so_base_en_lo = lower_32_bits(CFG_BASE +
3008                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009         so_base_en_hi = upper_32_bits(CFG_BASE +
3010                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015         so_base_ws_lo = lower_32_bits(CFG_BASE +
3016                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017         so_base_ws_hi = upper_32_bits(CFG_BASE +
3018                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020         q_off = tpc_offset + qman_id * 4;
3021
3022         tpc_id = tpc_offset /
3023                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025         if (qman_id < 4) {
3026                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027                                         lower_32_bits(qman_base_addr));
3028                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029                                         upper_32_bits(qman_base_addr));
3030
3031                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036                                                         QMAN_CPDMA_SIZE_OFFSET);
3037                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038                                                         QMAN_CPDMA_SRC_OFFSET);
3039                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040                                                         QMAN_CPDMA_DST_OFFSET);
3041         } else {
3042                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047                                                         QMAN_LDMA_SIZE_OFFSET);
3048                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049                                                         QMAN_LDMA_SRC_OFFSET);
3050                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051                                                         QMAN_LDMA_DST_OFFSET);
3052
3053                 /* Configure RAZWI IRQ */
3054                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055                 if (hdev->stop_on_err)
3056                         tpc_qm_err_cfg |=
3057                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062                         lower_32_bits(CFG_BASE + irq_handler_offset));
3063                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064                         upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068                                                                         tpc_id);
3069
3070                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071                                 QM_ARB_ERR_MSG_EN_MASK);
3072
3073                 /* Increase ARB WDT to support streams architecture */
3074                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3075                                 GAUDI_ARB_WDT_TIMEOUT);
3076
3077                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3078                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3079                                 QMAN_INTERNAL_MAKE_TRUSTED);
3080         }
3081
3082         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3083         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3084         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3085         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3086
3087         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3088         if (tpc_id == 6) {
3089                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3090                                 mtr_base_ws_lo);
3091                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3092                                 mtr_base_ws_hi);
3093                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3094                                 so_base_ws_lo);
3095                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3096                                 so_base_ws_hi);
3097         }
3098 }
3099
3100 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3101 {
3102         struct gaudi_device *gaudi = hdev->asic_specific;
3103         struct gaudi_internal_qman_info *q;
3104         u64 qman_base_addr;
3105         u32 so_base_hi, tpc_offset = 0;
3106         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3107                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3108         int i, tpc_id, internal_q_index;
3109
3110         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3111                 return;
3112
3113         so_base_hi = upper_32_bits(CFG_BASE +
3114                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3115
3116         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3117                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3118                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3119                                                 tpc_id * QMAN_STREAMS + i;
3120                         q = &gaudi->internal_qmans[internal_q_index];
3121                         qman_base_addr = (u64) q->pq_dma_addr;
3122                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3123                                                 qman_base_addr);
3124
3125                         if (i == 3) {
3126                                 /* Initializing lower CP for TPC QMAN */
3127                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3128
3129                                 /* Enable the QMAN and TPC channel */
3130                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3131                                                 QMAN_TPC_ENABLE);
3132                         }
3133                 }
3134
3135                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3136                                 so_base_hi);
3137
3138                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3139
3140                 gaudi->hw_cap_initialized |=
3141                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3142         }
3143 }
3144
3145 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3146                                 int qman_id, u64 qman_base_addr, int nic_id)
3147 {
3148         struct cpu_dyn_regs *dyn_regs =
3149                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3150         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3151         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3152         u32 nic_qm_err_cfg, irq_handler_offset;
3153         u32 q_off;
3154
3155         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3156                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3158                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159         so_base_en_lo = lower_32_bits(CFG_BASE +
3160                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161         so_base_en_hi = upper_32_bits(CFG_BASE +
3162                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3164                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3166                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167         so_base_ws_lo = lower_32_bits(CFG_BASE +
3168                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169         so_base_ws_hi = upper_32_bits(CFG_BASE +
3170                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171
3172         q_off = nic_offset + qman_id * 4;
3173
3174         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3175         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3176
3177         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3178         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3179         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3180
3181         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3182                                                         QMAN_LDMA_SIZE_OFFSET);
3183         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3184                                                         QMAN_LDMA_SRC_OFFSET);
3185         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3186                                                         QMAN_LDMA_DST_OFFSET);
3187
3188         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3189         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3190         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3191         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3192
3193         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3194         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3195         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3196         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3197         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3198
3199         if (qman_id == 0) {
3200                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3201                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3202                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3203
3204                 /* Configure RAZWI IRQ */
3205                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3206                 if (hdev->stop_on_err)
3207                         nic_qm_err_cfg |=
3208                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3209
3210                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3211
3212                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3213                         lower_32_bits(CFG_BASE + irq_handler_offset));
3214                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3215                         upper_32_bits(CFG_BASE + irq_handler_offset));
3216
3217                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3218                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3219                                                                         nic_id);
3220
3221                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3222                                 QM_ARB_ERR_MSG_EN_MASK);
3223
3224                 /* Increase ARB WDT to support streams architecture */
3225                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3226                                 GAUDI_ARB_WDT_TIMEOUT);
3227
3228                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3229                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3230                                 QMAN_INTERNAL_MAKE_TRUSTED);
3231         }
3232 }
3233
3234 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235 {
3236         struct gaudi_device *gaudi = hdev->asic_specific;
3237         struct gaudi_internal_qman_info *q;
3238         u64 qman_base_addr;
3239         u32 nic_offset = 0;
3240         u32 nic_delta_between_qmans =
3241                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242         u32 nic_delta_between_nics =
3243                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244         int i, nic_id, internal_q_index;
3245
3246         if (!hdev->nic_ports_mask)
3247                 return;
3248
3249         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3250                 return;
3251
3252         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253
3254         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3255                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3256                         nic_offset += nic_delta_between_qmans;
3257                         if (nic_id & 1) {
3258                                 nic_offset -= (nic_delta_between_qmans * 2);
3259                                 nic_offset += nic_delta_between_nics;
3260                         }
3261                         continue;
3262                 }
3263
3264                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3265                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3266                                                 nic_id * QMAN_STREAMS + i;
3267                         q = &gaudi->internal_qmans[internal_q_index];
3268                         qman_base_addr = (u64) q->pq_dma_addr;
3269                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3270                                                 qman_base_addr, nic_id);
3271                 }
3272
3273                 /* Enable the QMAN */
3274                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275
3276                 nic_offset += nic_delta_between_qmans;
3277                 if (nic_id & 1) {
3278                         nic_offset -= (nic_delta_between_qmans * 2);
3279                         nic_offset += nic_delta_between_nics;
3280                 }
3281
3282                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3283         }
3284 }
3285
3286 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287 {
3288         struct gaudi_device *gaudi = hdev->asic_specific;
3289
3290         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3291                 return;
3292
3293         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3294         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3295         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3296 }
3297
3298 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299 {
3300         struct gaudi_device *gaudi = hdev->asic_specific;
3301
3302         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3303                 return;
3304
3305         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3306         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3307         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3308         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3309         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3310 }
3311
3312 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313 {
3314         struct gaudi_device *gaudi = hdev->asic_specific;
3315
3316         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3317                 return;
3318
3319         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3320         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3321 }
3322
3323 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324 {
3325         struct gaudi_device *gaudi = hdev->asic_specific;
3326         u32 tpc_offset = 0;
3327         int tpc_id;
3328
3329         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3330                 return;
3331
3332         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3333                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3334                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3335         }
3336 }
3337
3338 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339 {
3340         struct gaudi_device *gaudi = hdev->asic_specific;
3341         u32 nic_mask, nic_offset = 0;
3342         u32 nic_delta_between_qmans =
3343                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344         u32 nic_delta_between_nics =
3345                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346         int nic_id;
3347
3348         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350
3351                 if (gaudi->hw_cap_initialized & nic_mask)
3352                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353
3354                 nic_offset += nic_delta_between_qmans;
3355                 if (nic_id & 1) {
3356                         nic_offset -= (nic_delta_between_qmans * 2);
3357                         nic_offset += nic_delta_between_nics;
3358                 }
3359         }
3360 }
3361
3362 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363 {
3364         struct gaudi_device *gaudi = hdev->asic_specific;
3365
3366         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3367                 return;
3368
3369         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3370         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373 }
3374
3375 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376 {
3377         struct gaudi_device *gaudi = hdev->asic_specific;
3378
3379         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3380                 return;
3381
3382         /* Stop CPs of HBM DMA QMANs */
3383
3384         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389 }
3390
3391 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392 {
3393         struct gaudi_device *gaudi = hdev->asic_specific;
3394
3395         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3396                 return;
3397
3398         /* Stop CPs of MME QMANs */
3399         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401 }
3402
3403 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404 {
3405         struct gaudi_device *gaudi = hdev->asic_specific;
3406
3407         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3408                 return;
3409
3410         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418 }
3419
3420 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421 {
3422         struct gaudi_device *gaudi = hdev->asic_specific;
3423
3424         /* Stop upper CPs of QMANs */
3425
3426         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3427                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3428                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3429                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3430                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431
3432         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3433                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3434                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3435                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3436                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437
3438         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3439                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3440                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3441                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3442                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443
3444         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3445                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3446                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3447                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3448                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449
3450         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3451                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3452                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3453                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3454                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455
3456         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3457                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3458                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3459                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3460                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461
3462         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3463                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3464                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3465                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3466                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467
3468         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3469                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3470                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3471                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3472                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473
3474         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3475                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3476                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3477                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3478                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479
3480         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3481                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3482                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3483                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3484                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3485 }
3486
3487 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488 {
3489         struct gaudi_device *gaudi = hdev->asic_specific;
3490
3491         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3492                 return;
3493
3494         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497 }
3498
3499 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500 {
3501         struct gaudi_device *gaudi = hdev->asic_specific;
3502
3503         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3504                 return;
3505
3506         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511 }
3512
3513 static void gaudi_mme_stall(struct hl_device *hdev)
3514 {
3515         struct gaudi_device *gaudi = hdev->asic_specific;
3516
3517         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3518                 return;
3519
3520         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3521         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537 }
3538
3539 static void gaudi_tpc_stall(struct hl_device *hdev)
3540 {
3541         struct gaudi_device *gaudi = hdev->asic_specific;
3542
3543         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3544                 return;
3545
3546         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554 }
3555
3556 static void gaudi_set_clock_gating(struct hl_device *hdev)
3557 {
3558         struct gaudi_device *gaudi = hdev->asic_specific;
3559         u32 qman_offset;
3560         bool enable;
3561         int i;
3562
3563         /* In case we are during debug session, don't enable the clock gate
3564          * as it may interfere
3565          */
3566         if (hdev->in_debug)
3567                 return;
3568
3569         if (hdev->asic_prop.fw_security_enabled)
3570                 return;
3571
3572         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3573                 enable = !!(hdev->clock_gating_mask &
3574                                 (BIT_ULL(gaudi_dma_assignment[i])));
3575
3576                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3577                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3578                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3579                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3580                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3581         }
3582
3583         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3584                 enable = !!(hdev->clock_gating_mask &
3585                                 (BIT_ULL(gaudi_dma_assignment[i])));
3586
3587                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3588                  * we need to not enable clock gating in that DMA
3589                  */
3590                 if (i == GAUDI_HBM_DMA_4)
3591                         enable = 0;
3592
3593                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3594                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3595                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3596                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3597                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3598         }
3599
3600         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3601         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3602         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3603
3604         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3605         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3606         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3607
3608         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3609                 enable = !!(hdev->clock_gating_mask &
3610                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3611
3612                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3613                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3614                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3615                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3616
3617                 qman_offset += TPC_QMAN_OFFSET;
3618         }
3619
3620         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3621 }
3622
3623 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3624 {
3625         struct gaudi_device *gaudi = hdev->asic_specific;
3626         u32 qman_offset;
3627         int i;
3628
3629         if (hdev->asic_prop.fw_security_enabled)
3630                 return;
3631
3632         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3633                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3634                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3635
3636                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3637         }
3638
3639         WREG32(mmMME0_QM_CGM_CFG, 0);
3640         WREG32(mmMME0_QM_CGM_CFG1, 0);
3641         WREG32(mmMME2_QM_CGM_CFG, 0);
3642         WREG32(mmMME2_QM_CGM_CFG1, 0);
3643
3644         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3645                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3646                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3647
3648                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3649         }
3650
3651         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3652 }
3653
3654 static void gaudi_enable_timestamp(struct hl_device *hdev)
3655 {
3656         /* Disable the timestamp counter */
3657         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3658
3659         /* Zero the lower/upper parts of the 64-bit counter */
3660         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3661         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3662
3663         /* Enable the counter */
3664         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3665 }
3666
3667 static void gaudi_disable_timestamp(struct hl_device *hdev)
3668 {
3669         /* Disable the timestamp counter */
3670         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3671 }
3672
3673 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3674 {
3675         u32 wait_timeout_ms;
3676
3677         dev_info(hdev->dev,
3678                 "Halting compute engines and disabling interrupts\n");
3679
3680         if (hdev->pldm)
3681                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3682         else
3683                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3684
3685         gaudi_stop_nic_qmans(hdev);
3686         gaudi_stop_mme_qmans(hdev);
3687         gaudi_stop_tpc_qmans(hdev);
3688         gaudi_stop_hbm_dma_qmans(hdev);
3689         gaudi_stop_pci_dma_qmans(hdev);
3690
3691         hdev->asic_funcs->disable_clock_gating(hdev);
3692
3693         msleep(wait_timeout_ms);
3694
3695         gaudi_pci_dma_stall(hdev);
3696         gaudi_hbm_dma_stall(hdev);
3697         gaudi_tpc_stall(hdev);
3698         gaudi_mme_stall(hdev);
3699
3700         msleep(wait_timeout_ms);
3701
3702         gaudi_disable_nic_qmans(hdev);
3703         gaudi_disable_mme_qmans(hdev);
3704         gaudi_disable_tpc_qmans(hdev);
3705         gaudi_disable_hbm_dma_qmans(hdev);
3706         gaudi_disable_pci_dma_qmans(hdev);
3707
3708         gaudi_disable_timestamp(hdev);
3709
3710         gaudi_disable_msi(hdev);
3711 }
3712
3713 static int gaudi_mmu_init(struct hl_device *hdev)
3714 {
3715         struct asic_fixed_properties *prop = &hdev->asic_prop;
3716         struct gaudi_device *gaudi = hdev->asic_specific;
3717         u64 hop0_addr;
3718         int rc, i;
3719
3720         if (!hdev->mmu_enable)
3721                 return 0;
3722
3723         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3724                 return 0;
3725
3726         for (i = 0 ; i < prop->max_asid ; i++) {
3727                 hop0_addr = prop->mmu_pgt_addr +
3728                                 (i * prop->mmu_hop_table_size);
3729
3730                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3731                 if (rc) {
3732                         dev_err(hdev->dev,
3733                                 "failed to set hop0 addr for asid %d\n", i);
3734                         goto err;
3735                 }
3736         }
3737
3738         /* init MMU cache manage page */
3739         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3740         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3741
3742         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3743
3744         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3745         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3746
3747         WREG32(mmSTLB_HOP_CONFIGURATION,
3748                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3749
3750         /*
3751          * The H/W expects the first PI after init to be 1. After wraparound
3752          * we'll write 0.
3753          */
3754         gaudi->mmu_cache_inv_pi = 1;
3755
3756         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3757
3758         return 0;
3759
3760 err:
3761         return rc;
3762 }
3763
3764 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3765 {
3766         void __iomem *dst;
3767
3768         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3769
3770         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3771 }
3772
3773 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3774 {
3775         void __iomem *dst;
3776
3777         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3778
3779         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3780 }
3781
3782 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3783 {
3784         struct dynamic_fw_load_mgr *dynamic_loader;
3785         struct cpu_dyn_regs *dyn_regs;
3786
3787         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3788
3789         /*
3790          * here we update initial values for few specific dynamic regs (as
3791          * before reading the first descriptor from FW those value has to be
3792          * hard-coded) in later stages of the protocol those values will be
3793          * updated automatically by reading the FW descriptor so data there
3794          * will always be up-to-date
3795          */
3796         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3797         dyn_regs->kmd_msg_to_cpu =
3798                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3799         dyn_regs->cpu_cmd_status_to_host =
3800                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3801
3802         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3803 }
3804
3805 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3806 {
3807         struct static_fw_load_mgr *static_loader;
3808
3809         static_loader = &hdev->fw_loader.static_loader;
3810
3811         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3812         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3813         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3814         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3815         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3816         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3817         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3818         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3819         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3820         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3821         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3822         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3823         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3824                         GAUDI_PLDM_RESET_WAIT_MSEC :
3825                         GAUDI_CPU_RESET_WAIT_MSEC;
3826 }
3827
3828 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3829 {
3830         struct asic_fixed_properties *prop = &hdev->asic_prop;
3831         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3832
3833         /* fill common fields */
3834         fw_loader->linux_loaded = false;
3835         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3836         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3837         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3838         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3839         fw_loader->skip_bmc = !hdev->bmc_enable;
3840         fw_loader->sram_bar_id = SRAM_BAR_ID;
3841         fw_loader->dram_bar_id = HBM_BAR_ID;
3842
3843         if (prop->dynamic_fw_load)
3844                 gaudi_init_dynamic_firmware_loader(hdev);
3845         else
3846                 gaudi_init_static_firmware_loader(hdev);
3847 }
3848
3849 static int gaudi_init_cpu(struct hl_device *hdev)
3850 {
3851         struct gaudi_device *gaudi = hdev->asic_specific;
3852         int rc;
3853
3854         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3855                 return 0;
3856
3857         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3858                 return 0;
3859
3860         /*
3861          * The device CPU works with 40 bits addresses.
3862          * This register sets the extension to 50 bits.
3863          */
3864         if (!hdev->asic_prop.fw_security_enabled)
3865                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3866
3867         rc = hl_fw_init_cpu(hdev);
3868
3869         if (rc)
3870                 return rc;
3871
3872         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3873
3874         return 0;
3875 }
3876
3877 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3878 {
3879         struct cpu_dyn_regs *dyn_regs =
3880                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3881         struct asic_fixed_properties *prop = &hdev->asic_prop;
3882         struct gaudi_device *gaudi = hdev->asic_specific;
3883         u32 status, irq_handler_offset;
3884         struct hl_eq *eq;
3885         struct hl_hw_queue *cpu_pq =
3886                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3887         int err;
3888
3889         if (!hdev->cpu_queues_enable)
3890                 return 0;
3891
3892         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3893                 return 0;
3894
3895         eq = &hdev->event_queue;
3896
3897         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3898         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3899
3900         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3901         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3902
3903         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3904                         lower_32_bits(hdev->cpu_accessible_dma_address));
3905         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3906                         upper_32_bits(hdev->cpu_accessible_dma_address));
3907
3908         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3909         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3910         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3911
3912         /* Used for EQ CI */
3913         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3914
3915         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3916
3917         if (gaudi->multi_msi_mode)
3918                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3919         else
3920                 WREG32(mmCPU_IF_QUEUE_INIT,
3921                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3922
3923         irq_handler_offset = prop->gic_interrupts_enable ?
3924                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3925                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3926
3927         WREG32(irq_handler_offset,
3928                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3929
3930         err = hl_poll_timeout(
3931                 hdev,
3932                 mmCPU_IF_QUEUE_INIT,
3933                 status,
3934                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3935                 1000,
3936                 cpu_timeout);
3937
3938         if (err) {
3939                 dev_err(hdev->dev,
3940                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3941                 return -EIO;
3942         }
3943
3944         /* update FW application security bits */
3945         if (prop->fw_cpu_boot_dev_sts0_valid)
3946                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3947         if (prop->fw_cpu_boot_dev_sts1_valid)
3948                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3949
3950         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3951         return 0;
3952 }
3953
3954 static void gaudi_pre_hw_init(struct hl_device *hdev)
3955 {
3956         /* Perform read from the device to make sure device is up */
3957         RREG32(mmHW_STATE);
3958
3959         if (!hdev->asic_prop.fw_security_enabled) {
3960                 /* Set the access through PCI bars (Linux driver only) as
3961                  * secured
3962                  */
3963                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3964                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3965                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3966
3967                 /* Perform read to flush the waiting writes to ensure
3968                  * configuration was set in the device
3969                  */
3970                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3971         }
3972
3973         /*
3974          * Let's mark in the H/W that we have reached this point. We check
3975          * this value in the reset_before_init function to understand whether
3976          * we need to reset the chip before doing H/W init. This register is
3977          * cleared by the H/W upon H/W reset
3978          */
3979         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3980 }
3981
3982 static int gaudi_hw_init(struct hl_device *hdev)
3983 {
3984         struct gaudi_device *gaudi = hdev->asic_specific;
3985         int rc;
3986
3987         gaudi_pre_hw_init(hdev);
3988
3989         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3990          * So we set it here and if anyone tries to move it later to
3991          * a different address, there will be an error
3992          */
3993         if (hdev->asic_prop.iatu_done_by_fw)
3994                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3995
3996         /*
3997          * Before pushing u-boot/linux to device, need to set the hbm bar to
3998          * base address of dram
3999          */
4000         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4001                 dev_err(hdev->dev,
4002                         "failed to map HBM bar to DRAM base address\n");
4003                 return -EIO;
4004         }
4005
4006         rc = gaudi_init_cpu(hdev);
4007         if (rc) {
4008                 dev_err(hdev->dev, "failed to initialize CPU\n");
4009                 return rc;
4010         }
4011
4012         /* In case the clock gating was enabled in preboot we need to disable
4013          * it here before touching the MME/TPC registers.
4014          * There is no need to take clk gating mutex because when this function
4015          * runs, no other relevant code can run
4016          */
4017         hdev->asic_funcs->disable_clock_gating(hdev);
4018
4019         /* SRAM scrambler must be initialized after CPU is running from HBM */
4020         gaudi_init_scrambler_sram(hdev);
4021
4022         /* This is here just in case we are working without CPU */
4023         gaudi_init_scrambler_hbm(hdev);
4024
4025         gaudi_init_golden_registers(hdev);
4026
4027         rc = gaudi_mmu_init(hdev);
4028         if (rc)
4029                 return rc;
4030
4031         gaudi_init_security(hdev);
4032
4033         gaudi_init_pci_dma_qmans(hdev);
4034
4035         gaudi_init_hbm_dma_qmans(hdev);
4036
4037         gaudi_init_mme_qmans(hdev);
4038
4039         gaudi_init_tpc_qmans(hdev);
4040
4041         gaudi_init_nic_qmans(hdev);
4042
4043         hdev->asic_funcs->set_clock_gating(hdev);
4044
4045         gaudi_enable_timestamp(hdev);
4046
4047         /* MSI must be enabled before CPU queues and NIC are initialized */
4048         rc = gaudi_enable_msi(hdev);
4049         if (rc)
4050                 goto disable_queues;
4051
4052         /* must be called after MSI was enabled */
4053         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4054         if (rc) {
4055                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4056                         rc);
4057                 goto disable_msi;
4058         }
4059
4060         /* Perform read from the device to flush all configuration */
4061         RREG32(mmHW_STATE);
4062
4063         return 0;
4064
4065 disable_msi:
4066         gaudi_disable_msi(hdev);
4067 disable_queues:
4068         gaudi_disable_mme_qmans(hdev);
4069         gaudi_disable_pci_dma_qmans(hdev);
4070
4071         return rc;
4072 }
4073
4074 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4075 {
4076         struct cpu_dyn_regs *dyn_regs =
4077                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4078         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4079         struct gaudi_device *gaudi = hdev->asic_specific;
4080         bool driver_performs_reset;
4081
4082         if (!hard_reset) {
4083                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4084                 return;
4085         }
4086
4087         if (hdev->pldm) {
4088                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4089                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4090         } else {
4091                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4092                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4093         }
4094
4095         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4096                                         !hdev->asic_prop.hard_reset_done_by_fw);
4097
4098         /* Set device to handle FLR by H/W as we will put the device CPU to
4099          * halt mode
4100          */
4101         if (driver_performs_reset)
4102                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4103                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4104
4105         /* If linux is loaded in the device CPU we need to communicate with it
4106          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4107          * registers in case of old F/Ws
4108          */
4109         if (hdev->fw_loader.linux_loaded) {
4110                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4111                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4112                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4113
4114                 WREG32(irq_handler_offset,
4115                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4116         } else {
4117                 if (hdev->asic_prop.hard_reset_done_by_fw)
4118                         hl_fw_ask_hard_reset_without_linux(hdev);
4119                 else
4120                         hl_fw_ask_halt_machine_without_linux(hdev);
4121         }
4122
4123         if (driver_performs_reset) {
4124
4125                 /* Configure the reset registers. Must be done as early as
4126                  * possible in case we fail during H/W initialization
4127                  */
4128                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4129                                                 (CFG_RST_H_DMA_MASK |
4130                                                 CFG_RST_H_MME_MASK |
4131                                                 CFG_RST_H_SM_MASK |
4132                                                 CFG_RST_H_TPC_7_MASK));
4133
4134                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4135
4136                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4137                                                 (CFG_RST_H_HBM_MASK |
4138                                                 CFG_RST_H_TPC_7_MASK |
4139                                                 CFG_RST_H_NIC_MASK |
4140                                                 CFG_RST_H_SM_MASK |
4141                                                 CFG_RST_H_DMA_MASK |
4142                                                 CFG_RST_H_MME_MASK |
4143                                                 CFG_RST_H_CPU_MASK |
4144                                                 CFG_RST_H_MMU_MASK));
4145
4146                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4147                                                 (CFG_RST_L_IF_MASK |
4148                                                 CFG_RST_L_PSOC_MASK |
4149                                                 CFG_RST_L_TPC_MASK));
4150
4151                 msleep(cpu_timeout_ms);
4152
4153                 /* Tell ASIC not to re-initialize PCIe */
4154                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4155
4156                 /* Restart BTL/BLR upon hard-reset */
4157                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4158
4159                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4160                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4161
4162                 dev_info(hdev->dev,
4163                         "Issued HARD reset command, going to wait %dms\n",
4164                         reset_timeout_ms);
4165         } else {
4166                 dev_info(hdev->dev,
4167                         "Firmware performs HARD reset, going to wait %dms\n",
4168                         reset_timeout_ms);
4169         }
4170
4171         /*
4172          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4173          * itself is in reset. Need to wait until the reset is deasserted
4174          */
4175         msleep(reset_timeout_ms);
4176
4177         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4178         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4179                 dev_err(hdev->dev,
4180                         "Timeout while waiting for device to reset 0x%x\n",
4181                         status);
4182
4183         if (gaudi) {
4184                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4185                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4186                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4187                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4188                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4189                                 HW_CAP_SRAM_SCRAMBLER |
4190                                 HW_CAP_HBM_SCRAMBLER |
4191                                 HW_CAP_CLK_GATE);
4192
4193                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4194
4195                 hdev->device_cpu_is_halted = false;
4196         }
4197 }
4198
4199 static int gaudi_suspend(struct hl_device *hdev)
4200 {
4201         int rc;
4202
4203         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4204         if (rc)
4205                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4206
4207         return rc;
4208 }
4209
4210 static int gaudi_resume(struct hl_device *hdev)
4211 {
4212         return gaudi_init_iatu(hdev);
4213 }
4214
4215 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4216                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4217 {
4218         int rc;
4219
4220         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4221                         VM_DONTCOPY | VM_NORESERVE;
4222
4223         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4224                                 (dma_addr - HOST_PHYS_BASE), size);
4225         if (rc)
4226                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4227
4228         return rc;
4229 }
4230
4231 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4232 {
4233         struct cpu_dyn_regs *dyn_regs =
4234                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4235         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4236         struct gaudi_device *gaudi = hdev->asic_specific;
4237         bool invalid_queue = false;
4238         int dma_id;
4239
4240         switch (hw_queue_id) {
4241         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4242                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4243                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4244                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4245                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4246                 break;
4247
4248         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4249                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4250                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4251                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4252                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4253                 break;
4254
4255         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4256                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4257                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4258                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4259                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4260                 break;
4261
4262         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4263                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4264                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4265                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4266                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4267                 break;
4268
4269         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4270                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4271                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4272                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4273                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4274                 break;
4275
4276         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4277                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4278                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4279                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4280                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4281                 break;
4282
4283         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4284                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4285                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4286                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4287                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4288                 break;
4289
4290         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4291                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4292                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4293                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4294                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4295                 break;
4296
4297         case GAUDI_QUEUE_ID_CPU_PQ:
4298                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4299                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4300                 else
4301                         invalid_queue = true;
4302                 break;
4303
4304         case GAUDI_QUEUE_ID_MME_0_0:
4305                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4306                 break;
4307
4308         case GAUDI_QUEUE_ID_MME_0_1:
4309                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4310                 break;
4311
4312         case GAUDI_QUEUE_ID_MME_0_2:
4313                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4314                 break;
4315
4316         case GAUDI_QUEUE_ID_MME_0_3:
4317                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4318                 break;
4319
4320         case GAUDI_QUEUE_ID_MME_1_0:
4321                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4322                 break;
4323
4324         case GAUDI_QUEUE_ID_MME_1_1:
4325                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4326                 break;
4327
4328         case GAUDI_QUEUE_ID_MME_1_2:
4329                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4330                 break;
4331
4332         case GAUDI_QUEUE_ID_MME_1_3:
4333                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4334                 break;
4335
4336         case GAUDI_QUEUE_ID_TPC_0_0:
4337                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4338                 break;
4339
4340         case GAUDI_QUEUE_ID_TPC_0_1:
4341                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4342                 break;
4343
4344         case GAUDI_QUEUE_ID_TPC_0_2:
4345                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4346                 break;
4347
4348         case GAUDI_QUEUE_ID_TPC_0_3:
4349                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4350                 break;
4351
4352         case GAUDI_QUEUE_ID_TPC_1_0:
4353                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4354                 break;
4355
4356         case GAUDI_QUEUE_ID_TPC_1_1:
4357                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4358                 break;
4359
4360         case GAUDI_QUEUE_ID_TPC_1_2:
4361                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4362                 break;
4363
4364         case GAUDI_QUEUE_ID_TPC_1_3:
4365                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4366                 break;
4367
4368         case GAUDI_QUEUE_ID_TPC_2_0:
4369                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4370                 break;
4371
4372         case GAUDI_QUEUE_ID_TPC_2_1:
4373                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4374                 break;
4375
4376         case GAUDI_QUEUE_ID_TPC_2_2:
4377                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4378                 break;
4379
4380         case GAUDI_QUEUE_ID_TPC_2_3:
4381                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4382                 break;
4383
4384         case GAUDI_QUEUE_ID_TPC_3_0:
4385                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4386                 break;
4387
4388         case GAUDI_QUEUE_ID_TPC_3_1:
4389                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4390                 break;
4391
4392         case GAUDI_QUEUE_ID_TPC_3_2:
4393                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4394                 break;
4395
4396         case GAUDI_QUEUE_ID_TPC_3_3:
4397                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4398                 break;
4399
4400         case GAUDI_QUEUE_ID_TPC_4_0:
4401                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4402                 break;
4403
4404         case GAUDI_QUEUE_ID_TPC_4_1:
4405                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4406                 break;
4407
4408         case GAUDI_QUEUE_ID_TPC_4_2:
4409                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4410                 break;
4411
4412         case GAUDI_QUEUE_ID_TPC_4_3:
4413                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4414                 break;
4415
4416         case GAUDI_QUEUE_ID_TPC_5_0:
4417                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4418                 break;
4419
4420         case GAUDI_QUEUE_ID_TPC_5_1:
4421                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4422                 break;
4423
4424         case GAUDI_QUEUE_ID_TPC_5_2:
4425                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4426                 break;
4427
4428         case GAUDI_QUEUE_ID_TPC_5_3:
4429                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4430                 break;
4431
4432         case GAUDI_QUEUE_ID_TPC_6_0:
4433                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4434                 break;
4435
4436         case GAUDI_QUEUE_ID_TPC_6_1:
4437                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4438                 break;
4439
4440         case GAUDI_QUEUE_ID_TPC_6_2:
4441                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4442                 break;
4443
4444         case GAUDI_QUEUE_ID_TPC_6_3:
4445                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4446                 break;
4447
4448         case GAUDI_QUEUE_ID_TPC_7_0:
4449                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4450                 break;
4451
4452         case GAUDI_QUEUE_ID_TPC_7_1:
4453                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4454                 break;
4455
4456         case GAUDI_QUEUE_ID_TPC_7_2:
4457                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4458                 break;
4459
4460         case GAUDI_QUEUE_ID_TPC_7_3:
4461                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4462                 break;
4463
4464         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4465                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4466                         invalid_queue = true;
4467
4468                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4469                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4470                 break;
4471
4472         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4473                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4474                         invalid_queue = true;
4475
4476                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4477                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4478                 break;
4479
4480         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4481                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4482                         invalid_queue = true;
4483
4484                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4485                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4486                 break;
4487
4488         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4489                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4490                         invalid_queue = true;
4491
4492                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4493                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4494                 break;
4495
4496         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4497                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4498                         invalid_queue = true;
4499
4500                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4501                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4502                 break;
4503
4504         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4505                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4506                         invalid_queue = true;
4507
4508                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4509                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4510                 break;
4511
4512         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4513                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4514                         invalid_queue = true;
4515
4516                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4517                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4518                 break;
4519
4520         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4521                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4522                         invalid_queue = true;
4523
4524                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4525                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4526                 break;
4527
4528         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4529                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4530                         invalid_queue = true;
4531
4532                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4533                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4534                 break;
4535
4536         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4537                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4538                         invalid_queue = true;
4539
4540                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4541                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4542                 break;
4543
4544         default:
4545                 invalid_queue = true;
4546         }
4547
4548         if (invalid_queue) {
4549                 /* Should never get here */
4550                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4551                         hw_queue_id);
4552                 return;
4553         }
4554
4555         db_value = pi;
4556
4557         /* ring the doorbell */
4558         WREG32(db_reg_offset, db_value);
4559
4560         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4561                 /* make sure device CPU will read latest data from host */
4562                 mb();
4563
4564                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4565                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4566                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4567
4568                 WREG32(irq_handler_offset,
4569                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4570         }
4571 }
4572
4573 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4574                                 struct hl_bd *bd)
4575 {
4576         __le64 *pbd = (__le64 *) bd;
4577
4578         /* The QMANs are on the host memory so a simple copy suffice */
4579         pqe[0] = pbd[0];
4580         pqe[1] = pbd[1];
4581 }
4582
4583 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4584                                         dma_addr_t *dma_handle, gfp_t flags)
4585 {
4586         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4587                                                 dma_handle, flags);
4588
4589         /* Shift to the device's base physical address of host memory */
4590         if (kernel_addr)
4591                 *dma_handle += HOST_PHYS_BASE;
4592
4593         return kernel_addr;
4594 }
4595
4596 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4597                 void *cpu_addr, dma_addr_t dma_handle)
4598 {
4599         /* Cancel the device's base physical address of host memory */
4600         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4601
4602         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4603 }
4604
4605 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4606 {
4607         struct asic_fixed_properties *prop = &hdev->asic_prop;
4608         u64  cur_addr = DRAM_BASE_ADDR_USER;
4609         u32 val;
4610         u32 chunk_size;
4611         int rc, dma_id;
4612
4613         while (cur_addr < prop->dram_end_address) {
4614                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4615                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4616
4617                         chunk_size =
4618                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4619
4620                         dev_dbg(hdev->dev,
4621                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4622                                 cur_addr, cur_addr + chunk_size);
4623
4624                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4625                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4626                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4627                                                 lower_32_bits(cur_addr));
4628                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4629                                                 upper_32_bits(cur_addr));
4630                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4631                                         chunk_size);
4632                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4633                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4634                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4635
4636                         cur_addr += chunk_size;
4637
4638                         if (cur_addr == prop->dram_end_address)
4639                                 break;
4640                 }
4641
4642                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4643                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4644
4645                         rc = hl_poll_timeout(
4646                                 hdev,
4647                                 mmDMA0_CORE_STS0 + dma_offset,
4648                                 val,
4649                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4650                                 1000,
4651                                 HBM_SCRUBBING_TIMEOUT_US);
4652
4653                         if (rc) {
4654                                 dev_err(hdev->dev,
4655                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4656                                         dma_id);
4657                                 return -EIO;
4658                         }
4659                 }
4660         }
4661
4662         return 0;
4663 }
4664
4665 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4666 {
4667         struct asic_fixed_properties *prop = &hdev->asic_prop;
4668         struct gaudi_device *gaudi = hdev->asic_specific;
4669         int rc = 0;
4670         u64 val = 0;
4671
4672         if (!hdev->memory_scrub)
4673                 return 0;
4674
4675         if (!addr && !size) {
4676                 /* Wait till device is idle */
4677                 rc = hl_poll_timeout(
4678                                 hdev,
4679                                 mmDMA0_CORE_STS0/* dummy */,
4680                                 val/* dummy */,
4681                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4682                                                 0, NULL)),
4683                                                 1000,
4684                                                 HBM_SCRUBBING_TIMEOUT_US);
4685                 if (rc) {
4686                         dev_err(hdev->dev, "waiting for idle timeout\n");
4687                         return -EIO;
4688                 }
4689
4690                 /* Scrub SRAM */
4691                 addr = prop->sram_user_base_address;
4692                 size = hdev->pldm ? 0x10000 :
4693                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4694                 val = 0x7777777777777777ull;
4695
4696                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4697                 if (rc) {
4698                         dev_err(hdev->dev,
4699                                 "Failed to clear SRAM in mem scrub all\n");
4700                         return rc;
4701                 }
4702
4703                 mutex_lock(&gaudi->clk_gate_mutex);
4704                 hdev->asic_funcs->disable_clock_gating(hdev);
4705
4706                 /* Scrub HBM using all DMA channels in parallel */
4707                 rc = gaudi_hbm_scrubbing(hdev);
4708                 if (rc)
4709                         dev_err(hdev->dev,
4710                                 "Failed to clear HBM in mem scrub all\n");
4711
4712                 hdev->asic_funcs->set_clock_gating(hdev);
4713                 mutex_unlock(&gaudi->clk_gate_mutex);
4714         }
4715
4716         return rc;
4717 }
4718
4719 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4720                                 u32 queue_id, dma_addr_t *dma_handle,
4721                                 u16 *queue_len)
4722 {
4723         struct gaudi_device *gaudi = hdev->asic_specific;
4724         struct gaudi_internal_qman_info *q;
4725
4726         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4727                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4728                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4729                 return NULL;
4730         }
4731
4732         q = &gaudi->internal_qmans[queue_id];
4733         *dma_handle = q->pq_dma_addr;
4734         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4735
4736         return q->pq_kernel_addr;
4737 }
4738
4739 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4740                                 u16 len, u32 timeout, u64 *result)
4741 {
4742         struct gaudi_device *gaudi = hdev->asic_specific;
4743
4744         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4745                 if (result)
4746                         *result = 0;
4747                 return 0;
4748         }
4749
4750         if (!timeout)
4751                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4752
4753         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4754                                                 timeout, result);
4755 }
4756
4757 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4758 {
4759         struct packet_msg_prot *fence_pkt;
4760         dma_addr_t pkt_dma_addr;
4761         u32 fence_val, tmp, timeout_usec;
4762         dma_addr_t fence_dma_addr;
4763         u32 *fence_ptr;
4764         int rc;
4765
4766         if (hdev->pldm)
4767                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4768         else
4769                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4770
4771         fence_val = GAUDI_QMAN0_FENCE_VAL;
4772
4773         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4774                                                         &fence_dma_addr);
4775         if (!fence_ptr) {
4776                 dev_err(hdev->dev,
4777                         "Failed to allocate memory for H/W queue %d testing\n",
4778                         hw_queue_id);
4779                 return -ENOMEM;
4780         }
4781
4782         *fence_ptr = 0;
4783
4784         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4785                                         sizeof(struct packet_msg_prot),
4786                                         GFP_KERNEL, &pkt_dma_addr);
4787         if (!fence_pkt) {
4788                 dev_err(hdev->dev,
4789                         "Failed to allocate packet for H/W queue %d testing\n",
4790                         hw_queue_id);
4791                 rc = -ENOMEM;
4792                 goto free_fence_ptr;
4793         }
4794
4795         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4796         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4797         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4798
4799         fence_pkt->ctl = cpu_to_le32(tmp);
4800         fence_pkt->value = cpu_to_le32(fence_val);
4801         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4802
4803         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4804                                         sizeof(struct packet_msg_prot),
4805                                         pkt_dma_addr);
4806         if (rc) {
4807                 dev_err(hdev->dev,
4808                         "Failed to send fence packet to H/W queue %d\n",
4809                         hw_queue_id);
4810                 goto free_pkt;
4811         }
4812
4813         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4814                                         1000, timeout_usec, true);
4815
4816         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4817
4818         if (rc == -ETIMEDOUT) {
4819                 dev_err(hdev->dev,
4820                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4821                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4822                 rc = -EIO;
4823         }
4824
4825 free_pkt:
4826         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4827                                         pkt_dma_addr);
4828 free_fence_ptr:
4829         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4830                                         fence_dma_addr);
4831         return rc;
4832 }
4833
4834 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4835 {
4836         struct gaudi_device *gaudi = hdev->asic_specific;
4837
4838         /*
4839          * check capability here as send_cpu_message() won't update the result
4840          * value if no capability
4841          */
4842         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4843                 return 0;
4844
4845         return hl_fw_test_cpu_queue(hdev);
4846 }
4847
4848 static int gaudi_test_queues(struct hl_device *hdev)
4849 {
4850         int i, rc, ret_val = 0;
4851
4852         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4853                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4854                         rc = gaudi_test_queue(hdev, i);
4855                         if (rc)
4856                                 ret_val = -EINVAL;
4857                 }
4858         }
4859
4860         rc = gaudi_test_cpu_queue(hdev);
4861         if (rc)
4862                 ret_val = -EINVAL;
4863
4864         return ret_val;
4865 }
4866
4867 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4868                 gfp_t mem_flags, dma_addr_t *dma_handle)
4869 {
4870         void *kernel_addr;
4871
4872         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4873                 return NULL;
4874
4875         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4876
4877         /* Shift to the device's base physical address of host memory */
4878         if (kernel_addr)
4879                 *dma_handle += HOST_PHYS_BASE;
4880
4881         return kernel_addr;
4882 }
4883
4884 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4885                         dma_addr_t dma_addr)
4886 {
4887         /* Cancel the device's base physical address of host memory */
4888         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4889
4890         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4891 }
4892
4893 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4894                                         size_t size, dma_addr_t *dma_handle)
4895 {
4896         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4897 }
4898
4899 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4900                                                 size_t size, void *vaddr)
4901 {
4902         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4903 }
4904
4905 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4906                         int nents, enum dma_data_direction dir)
4907 {
4908         struct scatterlist *sg;
4909         int i;
4910
4911         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4912                 return -ENOMEM;
4913
4914         /* Shift to the device's base physical address of host memory */
4915         for_each_sg(sgl, sg, nents, i)
4916                 sg->dma_address += HOST_PHYS_BASE;
4917
4918         return 0;
4919 }
4920
4921 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4922                         int nents, enum dma_data_direction dir)
4923 {
4924         struct scatterlist *sg;
4925         int i;
4926
4927         /* Cancel the device's base physical address of host memory */
4928         for_each_sg(sgl, sg, nents, i)
4929                 sg->dma_address -= HOST_PHYS_BASE;
4930
4931         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4932 }
4933
4934 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4935                                         struct sg_table *sgt)
4936 {
4937         struct scatterlist *sg, *sg_next_iter;
4938         u32 count, dma_desc_cnt;
4939         u64 len, len_next;
4940         dma_addr_t addr, addr_next;
4941
4942         dma_desc_cnt = 0;
4943
4944         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4945
4946                 len = sg_dma_len(sg);
4947                 addr = sg_dma_address(sg);
4948
4949                 if (len == 0)
4950                         break;
4951
4952                 while ((count + 1) < sgt->nents) {
4953                         sg_next_iter = sg_next(sg);
4954                         len_next = sg_dma_len(sg_next_iter);
4955                         addr_next = sg_dma_address(sg_next_iter);
4956
4957                         if (len_next == 0)
4958                                 break;
4959
4960                         if ((addr + len == addr_next) &&
4961                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4962                                 len += len_next;
4963                                 count++;
4964                                 sg = sg_next_iter;
4965                         } else {
4966                                 break;
4967                         }
4968                 }
4969
4970                 dma_desc_cnt++;
4971         }
4972
4973         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4974 }
4975
4976 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4977                                 struct hl_cs_parser *parser,
4978                                 struct packet_lin_dma *user_dma_pkt,
4979                                 u64 addr, enum dma_data_direction dir)
4980 {
4981         struct hl_userptr *userptr;
4982         int rc;
4983
4984         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4985                         parser->job_userptr_list, &userptr))
4986                 goto already_pinned;
4987
4988         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4989         if (!userptr)
4990                 return -ENOMEM;
4991
4992         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4993                                 userptr);
4994         if (rc)
4995                 goto free_userptr;
4996
4997         list_add_tail(&userptr->job_node, parser->job_userptr_list);
4998
4999         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5000                                         userptr->sgt->nents, dir);
5001         if (rc) {
5002                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5003                 goto unpin_memory;
5004         }
5005
5006         userptr->dma_mapped = true;
5007         userptr->dir = dir;
5008
5009 already_pinned:
5010         parser->patched_cb_size +=
5011                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5012
5013         return 0;
5014
5015 unpin_memory:
5016         list_del(&userptr->job_node);
5017         hl_unpin_host_memory(hdev, userptr);
5018 free_userptr:
5019         kfree(userptr);
5020         return rc;
5021 }
5022
5023 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5024                                 struct hl_cs_parser *parser,
5025                                 struct packet_lin_dma *user_dma_pkt,
5026                                 bool src_in_host)
5027 {
5028         enum dma_data_direction dir;
5029         bool skip_host_mem_pin = false, user_memset;
5030         u64 addr;
5031         int rc = 0;
5032
5033         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5034                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5035                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5036
5037         if (src_in_host) {
5038                 if (user_memset)
5039                         skip_host_mem_pin = true;
5040
5041                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5042                 dir = DMA_TO_DEVICE;
5043                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5044         } else {
5045                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5046                 dir = DMA_FROM_DEVICE;
5047                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5048                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5049                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5050         }
5051
5052         if (skip_host_mem_pin)
5053                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5054         else
5055                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5056                                                 addr, dir);
5057
5058         return rc;
5059 }
5060
5061 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5062                                 struct hl_cs_parser *parser,
5063                                 struct packet_lin_dma *user_dma_pkt)
5064 {
5065         bool src_in_host = false;
5066         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5067                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5068                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5069
5070         dev_dbg(hdev->dev, "DMA packet details:\n");
5071         dev_dbg(hdev->dev, "source == 0x%llx\n",
5072                                 le64_to_cpu(user_dma_pkt->src_addr));
5073         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5074         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5075
5076         /*
5077          * Special handling for DMA with size 0. Bypass all validations
5078          * because no transactions will be done except for WR_COMP, which
5079          * is not a security issue
5080          */
5081         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5082                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5083                 return 0;
5084         }
5085
5086         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5087                 src_in_host = true;
5088
5089         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5090                                                 src_in_host);
5091 }
5092
5093 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5094                                         struct hl_cs_parser *parser,
5095                                         struct packet_load_and_exe *user_pkt)
5096 {
5097         u32 cfg;
5098
5099         cfg = le32_to_cpu(user_pkt->cfg);
5100
5101         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5102                 dev_err(hdev->dev,
5103                         "User not allowed to use Load and Execute\n");
5104                 return -EPERM;
5105         }
5106
5107         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5108
5109         return 0;
5110 }
5111
5112 static int gaudi_validate_cb(struct hl_device *hdev,
5113                         struct hl_cs_parser *parser, bool is_mmu)
5114 {
5115         u32 cb_parsed_length = 0;
5116         int rc = 0;
5117
5118         parser->patched_cb_size = 0;
5119
5120         /* cb_user_size is more than 0 so loop will always be executed */
5121         while (cb_parsed_length < parser->user_cb_size) {
5122                 enum packet_id pkt_id;
5123                 u16 pkt_size;
5124                 struct gaudi_packet *user_pkt;
5125
5126                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5127
5128                 pkt_id = (enum packet_id) (
5129                                 (le64_to_cpu(user_pkt->header) &
5130                                 PACKET_HEADER_PACKET_ID_MASK) >>
5131                                         PACKET_HEADER_PACKET_ID_SHIFT);
5132
5133                 if (!validate_packet_id(pkt_id)) {
5134                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5135                         rc = -EINVAL;
5136                         break;
5137                 }
5138
5139                 pkt_size = gaudi_packet_sizes[pkt_id];
5140                 cb_parsed_length += pkt_size;
5141                 if (cb_parsed_length > parser->user_cb_size) {
5142                         dev_err(hdev->dev,
5143                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5144                         rc = -EINVAL;
5145                         break;
5146                 }
5147
5148                 switch (pkt_id) {
5149                 case PACKET_MSG_PROT:
5150                         dev_err(hdev->dev,
5151                                 "User not allowed to use MSG_PROT\n");
5152                         rc = -EPERM;
5153                         break;
5154
5155                 case PACKET_CP_DMA:
5156                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5157                         rc = -EPERM;
5158                         break;
5159
5160                 case PACKET_STOP:
5161                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5162                         rc = -EPERM;
5163                         break;
5164
5165                 case PACKET_WREG_BULK:
5166                         dev_err(hdev->dev,
5167                                 "User not allowed to use WREG_BULK\n");
5168                         rc = -EPERM;
5169                         break;
5170
5171                 case PACKET_LOAD_AND_EXE:
5172                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5173                                 (struct packet_load_and_exe *) user_pkt);
5174                         break;
5175
5176                 case PACKET_LIN_DMA:
5177                         parser->contains_dma_pkt = true;
5178                         if (is_mmu)
5179                                 parser->patched_cb_size += pkt_size;
5180                         else
5181                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5182                                         (struct packet_lin_dma *) user_pkt);
5183                         break;
5184
5185                 case PACKET_WREG_32:
5186                 case PACKET_MSG_LONG:
5187                 case PACKET_MSG_SHORT:
5188                 case PACKET_REPEAT:
5189                 case PACKET_FENCE:
5190                 case PACKET_NOP:
5191                 case PACKET_ARB_POINT:
5192                         parser->patched_cb_size += pkt_size;
5193                         break;
5194
5195                 default:
5196                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5197                                 pkt_id);
5198                         rc = -EINVAL;
5199                         break;
5200                 }
5201
5202                 if (rc)
5203                         break;
5204         }
5205
5206         /*
5207          * The new CB should have space at the end for two MSG_PROT packets:
5208          * 1. A packet that will act as a completion packet
5209          * 2. A packet that will generate MSI-X interrupt
5210          */
5211         if (parser->completion)
5212                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5213
5214         return rc;
5215 }
5216
5217 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5218                                 struct hl_cs_parser *parser,
5219                                 struct packet_lin_dma *user_dma_pkt,
5220                                 struct packet_lin_dma *new_dma_pkt,
5221                                 u32 *new_dma_pkt_size)
5222 {
5223         struct hl_userptr *userptr;
5224         struct scatterlist *sg, *sg_next_iter;
5225         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5226         u64 len, len_next;
5227         dma_addr_t dma_addr, dma_addr_next;
5228         u64 device_memory_addr, addr;
5229         enum dma_data_direction dir;
5230         struct sg_table *sgt;
5231         bool src_in_host = false;
5232         bool skip_host_mem_pin = false;
5233         bool user_memset;
5234
5235         ctl = le32_to_cpu(user_dma_pkt->ctl);
5236
5237         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5238                 src_in_host = true;
5239
5240         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5241                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5242
5243         if (src_in_host) {
5244                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5245                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5246                 dir = DMA_TO_DEVICE;
5247                 if (user_memset)
5248                         skip_host_mem_pin = true;
5249         } else {
5250                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5251                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5252                 dir = DMA_FROM_DEVICE;
5253         }
5254
5255         if ((!skip_host_mem_pin) &&
5256                 (!hl_userptr_is_pinned(hdev, addr,
5257                                         le32_to_cpu(user_dma_pkt->tsize),
5258                                         parser->job_userptr_list, &userptr))) {
5259                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5260                                 addr, user_dma_pkt->tsize);
5261                 return -EFAULT;
5262         }
5263
5264         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5265                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5266                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5267                 return 0;
5268         }
5269
5270         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5271
5272         sgt = userptr->sgt;
5273         dma_desc_cnt = 0;
5274
5275         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5276                 len = sg_dma_len(sg);
5277                 dma_addr = sg_dma_address(sg);
5278
5279                 if (len == 0)
5280                         break;
5281
5282                 while ((count + 1) < sgt->nents) {
5283                         sg_next_iter = sg_next(sg);
5284                         len_next = sg_dma_len(sg_next_iter);
5285                         dma_addr_next = sg_dma_address(sg_next_iter);
5286
5287                         if (len_next == 0)
5288                                 break;
5289
5290                         if ((dma_addr + len == dma_addr_next) &&
5291                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5292                                 len += len_next;
5293                                 count++;
5294                                 sg = sg_next_iter;
5295                         } else {
5296                                 break;
5297                         }
5298                 }
5299
5300                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5301                 if (likely(dma_desc_cnt))
5302                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5303                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5304                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5305                 new_dma_pkt->tsize = cpu_to_le32(len);
5306
5307                 if (dir == DMA_TO_DEVICE) {
5308                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5309                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5310                 } else {
5311                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5312                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5313                 }
5314
5315                 if (!user_memset)
5316                         device_memory_addr += len;
5317                 dma_desc_cnt++;
5318                 new_dma_pkt++;
5319         }
5320
5321         if (!dma_desc_cnt) {
5322                 dev_err(hdev->dev,
5323                         "Error of 0 SG entries when patching DMA packet\n");
5324                 return -EFAULT;
5325         }
5326
5327         /* Fix the last dma packet - wrcomp must be as user set it */
5328         new_dma_pkt--;
5329         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5330
5331         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5332
5333         return 0;
5334 }
5335
5336 static int gaudi_patch_cb(struct hl_device *hdev,
5337                                 struct hl_cs_parser *parser)
5338 {
5339         u32 cb_parsed_length = 0;
5340         u32 cb_patched_cur_length = 0;
5341         int rc = 0;
5342
5343         /* cb_user_size is more than 0 so loop will always be executed */
5344         while (cb_parsed_length < parser->user_cb_size) {
5345                 enum packet_id pkt_id;
5346                 u16 pkt_size;
5347                 u32 new_pkt_size = 0;
5348                 struct gaudi_packet *user_pkt, *kernel_pkt;
5349
5350                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5351                 kernel_pkt = parser->patched_cb->kernel_address +
5352                                         cb_patched_cur_length;
5353
5354                 pkt_id = (enum packet_id) (
5355                                 (le64_to_cpu(user_pkt->header) &
5356                                 PACKET_HEADER_PACKET_ID_MASK) >>
5357                                         PACKET_HEADER_PACKET_ID_SHIFT);
5358
5359                 if (!validate_packet_id(pkt_id)) {
5360                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5361                         rc = -EINVAL;
5362                         break;
5363                 }
5364
5365                 pkt_size = gaudi_packet_sizes[pkt_id];
5366                 cb_parsed_length += pkt_size;
5367                 if (cb_parsed_length > parser->user_cb_size) {
5368                         dev_err(hdev->dev,
5369                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5370                         rc = -EINVAL;
5371                         break;
5372                 }
5373
5374                 switch (pkt_id) {
5375                 case PACKET_LIN_DMA:
5376                         rc = gaudi_patch_dma_packet(hdev, parser,
5377                                         (struct packet_lin_dma *) user_pkt,
5378                                         (struct packet_lin_dma *) kernel_pkt,
5379                                         &new_pkt_size);
5380                         cb_patched_cur_length += new_pkt_size;
5381                         break;
5382
5383                 case PACKET_MSG_PROT:
5384                         dev_err(hdev->dev,
5385                                 "User not allowed to use MSG_PROT\n");
5386                         rc = -EPERM;
5387                         break;
5388
5389                 case PACKET_CP_DMA:
5390                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5391                         rc = -EPERM;
5392                         break;
5393
5394                 case PACKET_STOP:
5395                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5396                         rc = -EPERM;
5397                         break;
5398
5399                 case PACKET_WREG_32:
5400                 case PACKET_WREG_BULK:
5401                 case PACKET_MSG_LONG:
5402                 case PACKET_MSG_SHORT:
5403                 case PACKET_REPEAT:
5404                 case PACKET_FENCE:
5405                 case PACKET_NOP:
5406                 case PACKET_ARB_POINT:
5407                 case PACKET_LOAD_AND_EXE:
5408                         memcpy(kernel_pkt, user_pkt, pkt_size);
5409                         cb_patched_cur_length += pkt_size;
5410                         break;
5411
5412                 default:
5413                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5414                                 pkt_id);
5415                         rc = -EINVAL;
5416                         break;
5417                 }
5418
5419                 if (rc)
5420                         break;
5421         }
5422
5423         return rc;
5424 }
5425
5426 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5427                 struct hl_cs_parser *parser)
5428 {
5429         u64 patched_cb_handle;
5430         u32 patched_cb_size;
5431         struct hl_cb *user_cb;
5432         int rc;
5433
5434         /*
5435          * The new CB should have space at the end for two MSG_PROT pkt:
5436          * 1. A packet that will act as a completion packet
5437          * 2. A packet that will generate MSI interrupt
5438          */
5439         if (parser->completion)
5440                 parser->patched_cb_size = parser->user_cb_size +
5441                                 sizeof(struct packet_msg_prot) * 2;
5442         else
5443                 parser->patched_cb_size = parser->user_cb_size;
5444
5445         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5446                                 parser->patched_cb_size, false, false,
5447                                 &patched_cb_handle);
5448
5449         if (rc) {
5450                 dev_err(hdev->dev,
5451                         "Failed to allocate patched CB for DMA CS %d\n",
5452                         rc);
5453                 return rc;
5454         }
5455
5456         patched_cb_handle >>= PAGE_SHIFT;
5457         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5458                                 (u32) patched_cb_handle);
5459         /* hl_cb_get should never fail */
5460         if (!parser->patched_cb) {
5461                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5462                         (u32) patched_cb_handle);
5463                 rc = -EFAULT;
5464                 goto out;
5465         }
5466
5467         /*
5468          * The check that parser->user_cb_size <= parser->user_cb->size was done
5469          * in validate_queue_index().
5470          */
5471         memcpy(parser->patched_cb->kernel_address,
5472                 parser->user_cb->kernel_address,
5473                 parser->user_cb_size);
5474
5475         patched_cb_size = parser->patched_cb_size;
5476
5477         /* Validate patched CB instead of user CB */
5478         user_cb = parser->user_cb;
5479         parser->user_cb = parser->patched_cb;
5480         rc = gaudi_validate_cb(hdev, parser, true);
5481         parser->user_cb = user_cb;
5482
5483         if (rc) {
5484                 hl_cb_put(parser->patched_cb);
5485                 goto out;
5486         }
5487
5488         if (patched_cb_size != parser->patched_cb_size) {
5489                 dev_err(hdev->dev, "user CB size mismatch\n");
5490                 hl_cb_put(parser->patched_cb);
5491                 rc = -EINVAL;
5492                 goto out;
5493         }
5494
5495 out:
5496         /*
5497          * Always call cb destroy here because we still have 1 reference
5498          * to it by calling cb_get earlier. After the job will be completed,
5499          * cb_put will release it, but here we want to remove it from the
5500          * idr
5501          */
5502         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5503                                         patched_cb_handle << PAGE_SHIFT);
5504
5505         return rc;
5506 }
5507
5508 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5509                 struct hl_cs_parser *parser)
5510 {
5511         u64 patched_cb_handle;
5512         int rc;
5513
5514         rc = gaudi_validate_cb(hdev, parser, false);
5515
5516         if (rc)
5517                 goto free_userptr;
5518
5519         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5520                                 parser->patched_cb_size, false, false,
5521                                 &patched_cb_handle);
5522         if (rc) {
5523                 dev_err(hdev->dev,
5524                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5525                 goto free_userptr;
5526         }
5527
5528         patched_cb_handle >>= PAGE_SHIFT;
5529         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5530                                 (u32) patched_cb_handle);
5531         /* hl_cb_get should never fail here */
5532         if (!parser->patched_cb) {
5533                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5534                                 (u32) patched_cb_handle);
5535                 rc = -EFAULT;
5536                 goto out;
5537         }
5538
5539         rc = gaudi_patch_cb(hdev, parser);
5540
5541         if (rc)
5542                 hl_cb_put(parser->patched_cb);
5543
5544 out:
5545         /*
5546          * Always call cb destroy here because we still have 1 reference
5547          * to it by calling cb_get earlier. After the job will be completed,
5548          * cb_put will release it, but here we want to remove it from the
5549          * idr
5550          */
5551         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5552                                 patched_cb_handle << PAGE_SHIFT);
5553
5554 free_userptr:
5555         if (rc)
5556                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5557         return rc;
5558 }
5559
5560 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5561                                         struct hl_cs_parser *parser)
5562 {
5563         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5564         struct gaudi_device *gaudi = hdev->asic_specific;
5565         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5566                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5567
5568         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5569                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5570                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5571                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5572                                 parser->hw_queue_id);
5573                 return -EINVAL;
5574         }
5575
5576         /* For internal queue jobs just check if CB address is valid */
5577         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5578                                         parser->user_cb_size,
5579                                         asic_prop->sram_user_base_address,
5580                                         asic_prop->sram_end_address))
5581                 return 0;
5582
5583         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5584                                         parser->user_cb_size,
5585                                         asic_prop->dram_user_base_address,
5586                                         asic_prop->dram_end_address))
5587                 return 0;
5588
5589         /* PMMU and HPMMU addresses are equal, check only one of them */
5590         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5591                                         parser->user_cb_size,
5592                                         asic_prop->pmmu.start_addr,
5593                                         asic_prop->pmmu.end_addr))
5594                 return 0;
5595
5596         dev_err(hdev->dev,
5597                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5598                 parser->user_cb, parser->user_cb_size);
5599
5600         return -EFAULT;
5601 }
5602
5603 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5604 {
5605         struct gaudi_device *gaudi = hdev->asic_specific;
5606
5607         if (parser->queue_type == QUEUE_TYPE_INT)
5608                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5609
5610         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5611                 return gaudi_parse_cb_mmu(hdev, parser);
5612         else
5613                 return gaudi_parse_cb_no_mmu(hdev, parser);
5614 }
5615
5616 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5617                                         void *kernel_address, u32 len,
5618                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5619                                         bool eb)
5620 {
5621         struct gaudi_device *gaudi = hdev->asic_specific;
5622         struct packet_msg_prot *cq_pkt;
5623         u32 tmp;
5624
5625         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5626
5627         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5628         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5629
5630         if (eb)
5631                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5632
5633         cq_pkt->ctl = cpu_to_le32(tmp);
5634         cq_pkt->value = cpu_to_le32(cq_val);
5635         cq_pkt->addr = cpu_to_le64(cq_addr);
5636
5637         cq_pkt++;
5638
5639         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5640         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5641         cq_pkt->ctl = cpu_to_le32(tmp);
5642         cq_pkt->value = cpu_to_le32(1);
5643
5644         if (!gaudi->multi_msi_mode)
5645                 msi_vec = 0;
5646
5647         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5648 }
5649
5650 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5651 {
5652         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5653 }
5654
5655 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5656                                         u32 size, u64 val)
5657 {
5658         struct packet_lin_dma *lin_dma_pkt;
5659         struct hl_cs_job *job;
5660         u32 cb_size, ctl, err_cause;
5661         struct hl_cb *cb;
5662         u64 id;
5663         int rc;
5664
5665         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5666         if (!cb)
5667                 return -EFAULT;
5668
5669         lin_dma_pkt = cb->kernel_address;
5670         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5671         cb_size = sizeof(*lin_dma_pkt);
5672
5673         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5674         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5675         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5676         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5677         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5678
5679         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5680         lin_dma_pkt->src_addr = cpu_to_le64(val);
5681         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5682         lin_dma_pkt->tsize = cpu_to_le32(size);
5683
5684         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5685         if (!job) {
5686                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5687                 rc = -ENOMEM;
5688                 goto release_cb;
5689         }
5690
5691         /* Verify DMA is OK */
5692         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5693         if (err_cause && !hdev->init_done) {
5694                 dev_dbg(hdev->dev,
5695                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5696                         err_cause);
5697                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5698         }
5699
5700         job->id = 0;
5701         job->user_cb = cb;
5702         atomic_inc(&job->user_cb->cs_cnt);
5703         job->user_cb_size = cb_size;
5704         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5705         job->patched_cb = job->user_cb;
5706         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5707
5708         hl_debugfs_add_job(hdev, job);
5709
5710         rc = gaudi_send_job_on_qman0(hdev, job);
5711         hl_debugfs_remove_job(hdev, job);
5712         kfree(job);
5713         atomic_dec(&cb->cs_cnt);
5714
5715         /* Verify DMA is OK */
5716         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5717         if (err_cause) {
5718                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5719                 rc = -EIO;
5720                 if (!hdev->init_done) {
5721                         dev_dbg(hdev->dev,
5722                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5723                                 err_cause);
5724                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5725                 }
5726         }
5727
5728 release_cb:
5729         id = cb->id;
5730         hl_cb_put(cb);
5731         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5732
5733         return rc;
5734 }
5735
5736 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5737                                         u32 num_regs, u32 val)
5738 {
5739         struct packet_msg_long *pkt;
5740         struct hl_cs_job *job;
5741         u32 cb_size, ctl;
5742         struct hl_cb *cb;
5743         int i, rc;
5744
5745         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5746
5747         if (cb_size > SZ_2M) {
5748                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5749                 return -ENOMEM;
5750         }
5751
5752         cb = hl_cb_kernel_create(hdev, cb_size, false);
5753         if (!cb)
5754                 return -EFAULT;
5755
5756         pkt = cb->kernel_address;
5757
5758         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5759         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5760         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5761         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5762         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5763
5764         for (i = 0; i < num_regs ; i++, pkt++) {
5765                 pkt->ctl = cpu_to_le32(ctl);
5766                 pkt->value = cpu_to_le32(val);
5767                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5768         }
5769
5770         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5771         if (!job) {
5772                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5773                 rc = -ENOMEM;
5774                 goto release_cb;
5775         }
5776
5777         job->id = 0;
5778         job->user_cb = cb;
5779         atomic_inc(&job->user_cb->cs_cnt);
5780         job->user_cb_size = cb_size;
5781         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5782         job->patched_cb = job->user_cb;
5783         job->job_cb_size = cb_size;
5784
5785         hl_debugfs_add_job(hdev, job);
5786
5787         rc = gaudi_send_job_on_qman0(hdev, job);
5788         hl_debugfs_remove_job(hdev, job);
5789         kfree(job);
5790         atomic_dec(&cb->cs_cnt);
5791
5792 release_cb:
5793         hl_cb_put(cb);
5794         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5795
5796         return rc;
5797 }
5798
5799 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5800                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5801 {
5802         struct hl_ctx *ctx;
5803         struct hl_pending_cb *pending_cb;
5804         struct packet_msg_long *pkt;
5805         u32 cb_size, ctl;
5806         struct hl_cb *cb;
5807         int i, rc;
5808
5809         mutex_lock(&hdev->fpriv_list_lock);
5810         ctx = hdev->compute_ctx;
5811
5812         /* If no compute context available or context is going down
5813          * memset registers directly
5814          */
5815         if (!ctx || kref_read(&ctx->refcount) == 0) {
5816                 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5817                 mutex_unlock(&hdev->fpriv_list_lock);
5818                 return rc;
5819         }
5820
5821         mutex_unlock(&hdev->fpriv_list_lock);
5822
5823         cb_size = (sizeof(*pkt) * num_regs) +
5824                         sizeof(struct packet_msg_prot) * 2;
5825
5826         if (cb_size > SZ_2M) {
5827                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5828                 return -ENOMEM;
5829         }
5830
5831         pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5832         if (!pending_cb)
5833                 return -ENOMEM;
5834
5835         cb = hl_cb_kernel_create(hdev, cb_size, false);
5836         if (!cb) {
5837                 kfree(pending_cb);
5838                 return -EFAULT;
5839         }
5840
5841         pkt = cb->kernel_address;
5842
5843         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5844         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5845         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5846         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5847         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5848
5849         for (i = 0; i < num_regs ; i++, pkt++) {
5850                 pkt->ctl = cpu_to_le32(ctl);
5851                 pkt->value = cpu_to_le32(val);
5852                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5853         }
5854
5855         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5856
5857         pending_cb->cb = cb;
5858         pending_cb->cb_size = cb_size;
5859         /* The queue ID MUST be an external queue ID. Otherwise, we will
5860          * have undefined behavior
5861          */
5862         pending_cb->hw_queue_id = hw_queue_id;
5863
5864         spin_lock(&ctx->pending_cb_lock);
5865         list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5866         spin_unlock(&ctx->pending_cb_lock);
5867
5868         return 0;
5869 }
5870
5871 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5872 {
5873         u64 base_addr;
5874         u32 num_regs;
5875         int rc;
5876
5877         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5878         num_regs = NUM_OF_SOB_IN_BLOCK;
5879         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5880         if (rc) {
5881                 dev_err(hdev->dev, "failed resetting SM registers");
5882                 return -ENOMEM;
5883         }
5884
5885         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5886         num_regs = NUM_OF_SOB_IN_BLOCK;
5887         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5888         if (rc) {
5889                 dev_err(hdev->dev, "failed resetting SM registers");
5890                 return -ENOMEM;
5891         }
5892
5893         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5894         num_regs = NUM_OF_SOB_IN_BLOCK;
5895         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5896         if (rc) {
5897                 dev_err(hdev->dev, "failed resetting SM registers");
5898                 return -ENOMEM;
5899         }
5900
5901         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5902         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5903         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5904         if (rc) {
5905                 dev_err(hdev->dev, "failed resetting SM registers");
5906                 return -ENOMEM;
5907         }
5908
5909         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5910         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5911         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5912         if (rc) {
5913                 dev_err(hdev->dev, "failed resetting SM registers");
5914                 return -ENOMEM;
5915         }
5916
5917         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5918         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5919         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5920         if (rc) {
5921                 dev_err(hdev->dev, "failed resetting SM registers");
5922                 return -ENOMEM;
5923         }
5924
5925         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5926                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5927         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5928         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5929         if (rc) {
5930                 dev_err(hdev->dev, "failed resetting SM registers");
5931                 return -ENOMEM;
5932         }
5933
5934         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5935                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5936         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5937         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5938         if (rc) {
5939                 dev_err(hdev->dev, "failed resetting SM registers");
5940                 return -ENOMEM;
5941         }
5942
5943         return 0;
5944 }
5945
5946 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5947 {
5948         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5949                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5950         int i;
5951
5952         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5953                 u64 sob_addr = CFG_BASE +
5954                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5955                                 (i * sob_delta);
5956                 u32 dma_offset = i * DMA_CORE_OFFSET;
5957
5958                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5959                                 lower_32_bits(sob_addr));
5960                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5961                                 upper_32_bits(sob_addr));
5962                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5963
5964                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5965                  * modified by the user for SRAM reduction
5966                  */
5967                 if (i > 1)
5968                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5969                                                                 0x00000001);
5970         }
5971 }
5972
5973 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5974 {
5975         u32 qman_offset;
5976         int i;
5977
5978         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5979                 qman_offset = i * DMA_QMAN_OFFSET;
5980                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5981         }
5982
5983         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5984                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5985                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5986         }
5987
5988         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5989                 qman_offset = i * TPC_QMAN_OFFSET;
5990                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5991         }
5992
5993         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5994                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5995                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5996                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5997         }
5998 }
5999
6000 static int gaudi_restore_user_registers(struct hl_device *hdev)
6001 {
6002         int rc;
6003
6004         rc = gaudi_restore_sm_registers(hdev);
6005         if (rc)
6006                 return rc;
6007
6008         gaudi_restore_dma_registers(hdev);
6009         gaudi_restore_qm_registers(hdev);
6010
6011         return 0;
6012 }
6013
6014 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6015 {
6016         return gaudi_restore_user_registers(hdev);
6017 }
6018
6019 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6020 {
6021         struct asic_fixed_properties *prop = &hdev->asic_prop;
6022         struct gaudi_device *gaudi = hdev->asic_specific;
6023         u64 addr = prop->mmu_pgt_addr;
6024         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6025
6026         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6027                 return 0;
6028
6029         return gaudi_memset_device_memory(hdev, addr, size, 0);
6030 }
6031
6032 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6033 {
6034
6035 }
6036
6037 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6038                         bool user_address, u32 *val)
6039 {
6040         struct asic_fixed_properties *prop = &hdev->asic_prop;
6041         struct gaudi_device *gaudi = hdev->asic_specific;
6042         u64 hbm_bar_addr, host_phys_end;
6043         int rc = 0;
6044
6045         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6046
6047         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6048
6049                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6050                                 (hdev->clock_gating_mask &
6051                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6052
6053                         dev_err_ratelimited(hdev->dev,
6054                                 "Can't read register - clock gating is enabled!\n");
6055                         rc = -EFAULT;
6056                 } else {
6057                         *val = RREG32(addr - CFG_BASE);
6058                 }
6059
6060         } else if ((addr >= SRAM_BASE_ADDR) &&
6061                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6062                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6063                                 (addr - SRAM_BASE_ADDR));
6064         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6065                 u64 bar_base_addr = DRAM_PHYS_BASE +
6066                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6067
6068                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6069                 if (hbm_bar_addr != U64_MAX) {
6070                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6071                                                 (addr - bar_base_addr));
6072
6073                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6074                                                 hbm_bar_addr);
6075                 }
6076                 if (hbm_bar_addr == U64_MAX)
6077                         rc = -EIO;
6078         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6079                         user_address && !iommu_present(&pci_bus_type)) {
6080                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6081         } else {
6082                 rc = -EFAULT;
6083         }
6084
6085         return rc;
6086 }
6087
6088 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6089                         bool user_address, u32 val)
6090 {
6091         struct asic_fixed_properties *prop = &hdev->asic_prop;
6092         struct gaudi_device *gaudi = hdev->asic_specific;
6093         u64 hbm_bar_addr, host_phys_end;
6094         int rc = 0;
6095
6096         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6097
6098         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6099
6100                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6101                                 (hdev->clock_gating_mask &
6102                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6103
6104                         dev_err_ratelimited(hdev->dev,
6105                                 "Can't write register - clock gating is enabled!\n");
6106                         rc = -EFAULT;
6107                 } else {
6108                         WREG32(addr - CFG_BASE, val);
6109                 }
6110
6111         } else if ((addr >= SRAM_BASE_ADDR) &&
6112                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6113                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6114                                         (addr - SRAM_BASE_ADDR));
6115         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6116                 u64 bar_base_addr = DRAM_PHYS_BASE +
6117                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6118
6119                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6120                 if (hbm_bar_addr != U64_MAX) {
6121                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6122                                                 (addr - bar_base_addr));
6123
6124                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6125                                                 hbm_bar_addr);
6126                 }
6127                 if (hbm_bar_addr == U64_MAX)
6128                         rc = -EIO;
6129         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6130                         user_address && !iommu_present(&pci_bus_type)) {
6131                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6132         } else {
6133                 rc = -EFAULT;
6134         }
6135
6136         return rc;
6137 }
6138
6139 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6140                                 bool user_address, u64 *val)
6141 {
6142         struct asic_fixed_properties *prop = &hdev->asic_prop;
6143         struct gaudi_device *gaudi = hdev->asic_specific;
6144         u64 hbm_bar_addr, host_phys_end;
6145         int rc = 0;
6146
6147         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6148
6149         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6150
6151                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6152                                 (hdev->clock_gating_mask &
6153                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6154
6155                         dev_err_ratelimited(hdev->dev,
6156                                 "Can't read register - clock gating is enabled!\n");
6157                         rc = -EFAULT;
6158                 } else {
6159                         u32 val_l = RREG32(addr - CFG_BASE);
6160                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6161
6162                         *val = (((u64) val_h) << 32) | val_l;
6163                 }
6164
6165         } else if ((addr >= SRAM_BASE_ADDR) &&
6166                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6167                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6168                                 (addr - SRAM_BASE_ADDR));
6169         } else if (addr <=
6170                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6171                 u64 bar_base_addr = DRAM_PHYS_BASE +
6172                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6173
6174                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6175                 if (hbm_bar_addr != U64_MAX) {
6176                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6177                                                 (addr - bar_base_addr));
6178
6179                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6180                                                 hbm_bar_addr);
6181                 }
6182                 if (hbm_bar_addr == U64_MAX)
6183                         rc = -EIO;
6184         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6185                         user_address && !iommu_present(&pci_bus_type)) {
6186                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6187         } else {
6188                 rc = -EFAULT;
6189         }
6190
6191         return rc;
6192 }
6193
6194 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6195                                 bool user_address, u64 val)
6196 {
6197         struct asic_fixed_properties *prop = &hdev->asic_prop;
6198         struct gaudi_device *gaudi = hdev->asic_specific;
6199         u64 hbm_bar_addr, host_phys_end;
6200         int rc = 0;
6201
6202         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6203
6204         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6205
6206                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6207                                 (hdev->clock_gating_mask &
6208                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6209
6210                         dev_err_ratelimited(hdev->dev,
6211                                 "Can't write register - clock gating is enabled!\n");
6212                         rc = -EFAULT;
6213                 } else {
6214                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6215                         WREG32(addr + sizeof(u32) - CFG_BASE,
6216                                 upper_32_bits(val));
6217                 }
6218
6219         } else if ((addr >= SRAM_BASE_ADDR) &&
6220                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6221                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6222                                         (addr - SRAM_BASE_ADDR));
6223         } else if (addr <=
6224                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6225                 u64 bar_base_addr = DRAM_PHYS_BASE +
6226                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6227
6228                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6229                 if (hbm_bar_addr != U64_MAX) {
6230                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6231                                                 (addr - bar_base_addr));
6232
6233                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6234                                                 hbm_bar_addr);
6235                 }
6236                 if (hbm_bar_addr == U64_MAX)
6237                         rc = -EIO;
6238         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6239                         user_address && !iommu_present(&pci_bus_type)) {
6240                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6241         } else {
6242                 rc = -EFAULT;
6243         }
6244
6245         return rc;
6246 }
6247
6248 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6249                                         u32 size_to_dma, dma_addr_t dma_addr)
6250 {
6251         u32 err_cause, val;
6252         u64 dma_offset;
6253         int rc;
6254
6255         dma_offset = dma_id * DMA_CORE_OFFSET;
6256
6257         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6258         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6259         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6260         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6261         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6262         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6263                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6264
6265         rc = hl_poll_timeout(
6266                 hdev,
6267                 mmDMA0_CORE_STS0 + dma_offset,
6268                 val,
6269                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6270                 0,
6271                 1000000);
6272
6273         if (rc) {
6274                 dev_err(hdev->dev,
6275                         "DMA %d timed-out during reading of 0x%llx\n",
6276                         dma_id, addr);
6277                 return -EIO;
6278         }
6279
6280         /* Verify DMA is OK */
6281         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6282         if (err_cause) {
6283                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6284                 dev_dbg(hdev->dev,
6285                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6286                         err_cause);
6287                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6288
6289                 return -EIO;
6290         }
6291
6292         return 0;
6293 }
6294
6295 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6296                                 void *blob_addr)
6297 {
6298         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6299         struct gaudi_device *gaudi = hdev->asic_specific;
6300         u64 dma_offset, qm_offset;
6301         dma_addr_t dma_addr;
6302         void *kernel_addr;
6303         bool is_eng_idle;
6304         int rc = 0, dma_id;
6305
6306         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6307                                                 hdev, SZ_2M,
6308                                                 &dma_addr,
6309                                                 GFP_KERNEL | __GFP_ZERO);
6310
6311         if (!kernel_addr)
6312                 return -ENOMEM;
6313
6314         mutex_lock(&gaudi->clk_gate_mutex);
6315
6316         hdev->asic_funcs->disable_clock_gating(hdev);
6317
6318         hdev->asic_funcs->hw_queues_lock(hdev);
6319
6320         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6321         dma_offset = dma_id * DMA_CORE_OFFSET;
6322         qm_offset = dma_id * DMA_QMAN_OFFSET;
6323         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6324         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6325
6326         if (!is_eng_idle) {
6327                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6328                 dma_offset = dma_id * DMA_CORE_OFFSET;
6329                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6330                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6331                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6332
6333                 if (!is_eng_idle) {
6334                         dev_err_ratelimited(hdev->dev,
6335                                 "Can't read via DMA because it is BUSY\n");
6336                         rc = -EAGAIN;
6337                         goto out;
6338                 }
6339         }
6340
6341         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6342         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6343                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6344
6345         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6346          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6347          * ASID
6348          */
6349         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6350
6351         /* Verify DMA is OK */
6352         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6353         if (err_cause) {
6354                 dev_dbg(hdev->dev,
6355                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6356                         err_cause);
6357                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6358         }
6359
6360         pos = 0;
6361         size_left = size;
6362         size_to_dma = SZ_2M;
6363
6364         while (size_left > 0) {
6365
6366                 if (size_left < SZ_2M)
6367                         size_to_dma = size_left;
6368
6369                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6370                                                 dma_addr);
6371                 if (rc)
6372                         break;
6373
6374                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6375
6376                 if (size_left <= SZ_2M)
6377                         break;
6378
6379                 pos += SZ_2M;
6380                 addr += SZ_2M;
6381                 size_left -= SZ_2M;
6382         }
6383
6384         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6385          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6386          * ASID
6387          */
6388         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6389                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6390
6391         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6392
6393 out:
6394         hdev->asic_funcs->hw_queues_unlock(hdev);
6395
6396         hdev->asic_funcs->set_clock_gating(hdev);
6397
6398         mutex_unlock(&gaudi->clk_gate_mutex);
6399
6400         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6401                                                 dma_addr);
6402
6403         return rc;
6404 }
6405
6406 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6407 {
6408         struct gaudi_device *gaudi = hdev->asic_specific;
6409
6410         if (hdev->hard_reset_pending)
6411                 return U64_MAX;
6412
6413         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6414                         (addr - gaudi->hbm_bar_cur_addr));
6415 }
6416
6417 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6418 {
6419         struct gaudi_device *gaudi = hdev->asic_specific;
6420
6421         if (hdev->hard_reset_pending)
6422                 return;
6423
6424         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6425                         (addr - gaudi->hbm_bar_cur_addr));
6426 }
6427
6428 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6429 {
6430         /* mask to zero the MMBP and ASID bits */
6431         WREG32_AND(reg, ~0x7FF);
6432         WREG32_OR(reg, asid);
6433 }
6434
6435 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6436 {
6437         struct gaudi_device *gaudi = hdev->asic_specific;
6438
6439         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6440                 return;
6441
6442         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6443                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6444                 return;
6445         }
6446
6447         mutex_lock(&gaudi->clk_gate_mutex);
6448
6449         hdev->asic_funcs->disable_clock_gating(hdev);
6450
6451         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6452         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6453         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6454         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6455         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6456
6457         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6458         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6459         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6460         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6461         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6462
6463         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6464         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6465         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6466         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6467         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6468
6469         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6470         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6471         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6472         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6473         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6474
6475         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6476         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6477         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6478         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6479         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6480
6481         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6482         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6483         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6484         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6485         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6486
6487         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6488         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6489         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6490         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6491         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6492
6493         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6494         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6495         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6496         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6497         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6498
6499         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6500         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6501         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6502         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6503         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6504         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6505         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6506         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6507
6508         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6509         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6510         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6511         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6512         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6513         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6514         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6515
6516         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6517         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6518         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6519         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6520         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6521         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6522         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6523
6524         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6525         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6526         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6527         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6528         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6529         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6530         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6531
6532         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6533         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6534         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6535         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6536         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6537         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6538         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6539
6540         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6541         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6542         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6543         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6544         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6545         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6547
6548         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6549         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6550         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6551         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6555
6556         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6557         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6558         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6560         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6562         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6563
6564         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6565         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6566         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6571
6572         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6573         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6574         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6582
6583         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6584         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6585         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6595
6596         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6597                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6598                                 asid);
6599                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6600                                 asid);
6601                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6602                                 asid);
6603                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6604                                 asid);
6605                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6606                                 asid);
6607         }
6608
6609         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6610                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6611                                 asid);
6612                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6613                                 asid);
6614                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6615                                 asid);
6616                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6617                                 asid);
6618                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6619                                 asid);
6620         }
6621
6622         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6623                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6624                                 asid);
6625                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6626                                 asid);
6627                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6628                                 asid);
6629                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6630                                 asid);
6631                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6632                                 asid);
6633         }
6634
6635         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6636                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6637                                 asid);
6638                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6639                                 asid);
6640                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6641                                 asid);
6642                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6643                                 asid);
6644                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6645                                 asid);
6646         }
6647
6648         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6649                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6650                                 asid);
6651                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6652                                 asid);
6653                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6654                                 asid);
6655                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6656                                 asid);
6657                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6658                                 asid);
6659         }
6660
6661         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6662                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6663                                 asid);
6664                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6665                                 asid);
6666                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6667                                 asid);
6668                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6669                                 asid);
6670                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6671                                 asid);
6672         }
6673
6674         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6675                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6676                                 asid);
6677                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6678                                 asid);
6679                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6680                                 asid);
6681                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6682                                 asid);
6683                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6684                                 asid);
6685         }
6686
6687         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6688                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6689                                 asid);
6690                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6691                                 asid);
6692                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6693                                 asid);
6694                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6695                                 asid);
6696                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6697                                 asid);
6698         }
6699
6700         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6701                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6702                                 asid);
6703                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6704                                 asid);
6705                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6706                                 asid);
6707                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6708                                 asid);
6709                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6710                                 asid);
6711         }
6712
6713         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6714                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6715                                 asid);
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6719                                 asid);
6720                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6721                                 asid);
6722                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6723                                 asid);
6724         }
6725
6726         hdev->asic_funcs->set_clock_gating(hdev);
6727
6728         mutex_unlock(&gaudi->clk_gate_mutex);
6729 }
6730
6731 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6732                 struct hl_cs_job *job)
6733 {
6734         struct packet_msg_prot *fence_pkt;
6735         u32 *fence_ptr;
6736         dma_addr_t fence_dma_addr;
6737         struct hl_cb *cb;
6738         u32 tmp, timeout, dma_offset;
6739         int rc;
6740
6741         if (hdev->pldm)
6742                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6743         else
6744                 timeout = HL_DEVICE_TIMEOUT_USEC;
6745
6746         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6747                 dev_err_ratelimited(hdev->dev,
6748                         "Can't send driver job on QMAN0 because the device is not idle\n");
6749                 return -EBUSY;
6750         }
6751
6752         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6753                                                         &fence_dma_addr);
6754         if (!fence_ptr) {
6755                 dev_err(hdev->dev,
6756                         "Failed to allocate fence memory for QMAN0\n");
6757                 return -ENOMEM;
6758         }
6759
6760         cb = job->patched_cb;
6761
6762         fence_pkt = cb->kernel_address +
6763                         job->job_cb_size - sizeof(struct packet_msg_prot);
6764
6765         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6766         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6767         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6768
6769         fence_pkt->ctl = cpu_to_le32(tmp);
6770         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6771         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6772
6773         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6774
6775         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6776
6777         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6778                                         job->job_cb_size, cb->bus_address);
6779         if (rc) {
6780                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6781                 goto free_fence_ptr;
6782         }
6783
6784         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6785                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6786                                 timeout, true);
6787
6788         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6789
6790         if (rc == -ETIMEDOUT) {
6791                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6792                 goto free_fence_ptr;
6793         }
6794
6795 free_fence_ptr:
6796         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6797                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6798
6799         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6800                                         fence_dma_addr);
6801         return rc;
6802 }
6803
6804 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6805 {
6806         if (event_type >= GAUDI_EVENT_SIZE)
6807                 goto event_not_supported;
6808
6809         if (!gaudi_irq_map_table[event_type].valid)
6810                 goto event_not_supported;
6811
6812         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6813
6814         return;
6815
6816 event_not_supported:
6817         snprintf(desc, size, "N/A");
6818 }
6819
6820 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6821                                                         u32 x_y, bool is_write)
6822 {
6823         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6824
6825         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6826                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6827
6828         switch (x_y) {
6829         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6830         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6831                 dma_id[0] = 0;
6832                 dma_id[1] = 2;
6833                 break;
6834         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6835         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6836                 dma_id[0] = 1;
6837                 dma_id[1] = 3;
6838                 break;
6839         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6840         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6841                 dma_id[0] = 4;
6842                 dma_id[1] = 6;
6843                 break;
6844         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6845         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6846                 dma_id[0] = 5;
6847                 dma_id[1] = 7;
6848                 break;
6849         default:
6850                 goto unknown_initiator;
6851         }
6852
6853         for (i = 0 ; i < 2 ; i++) {
6854                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6855                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6856         }
6857
6858         switch (x_y) {
6859         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6860         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6861                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6862                         return "DMA0";
6863                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6864                         return "DMA2";
6865                 else
6866                         return "DMA0 or DMA2";
6867         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6868         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6869                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6870                         return "DMA1";
6871                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6872                         return "DMA3";
6873                 else
6874                         return "DMA1 or DMA3";
6875         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6876         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6877                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6878                         return "DMA4";
6879                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6880                         return "DMA6";
6881                 else
6882                         return "DMA4 or DMA6";
6883         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6884         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6885                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6886                         return "DMA5";
6887                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6888                         return "DMA7";
6889                 else
6890                         return "DMA5 or DMA7";
6891         }
6892
6893 unknown_initiator:
6894         return "unknown initiator";
6895 }
6896
6897 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6898                                                         bool is_write)
6899 {
6900         u32 val, x_y, axi_id;
6901
6902         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6903                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6904         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6905                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6906         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6907                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6908
6909         switch (x_y) {
6910         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6911                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6912                         return "TPC0";
6913                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6914                         return "NIC0";
6915                 break;
6916         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6917                 return "TPC1";
6918         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6919         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6920                 return "MME0";
6921         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6922         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6923                 return "MME1";
6924         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6925                 return "TPC2";
6926         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6927                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6928                         return "TPC3";
6929                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6930                         return "PCI";
6931                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6932                         return "CPU";
6933                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6934                         return "PSOC";
6935                 break;
6936         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6937         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6938         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6939         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6940         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6941         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6942         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6943         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6944                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6945         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6946                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6947                         return "TPC4";
6948                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6949                         return "NIC1";
6950                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6951                         return "NIC2";
6952                 break;
6953         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6954                 return "TPC5";
6955         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6956         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6957                 return "MME2";
6958         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6959         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6960                 return "MME3";
6961         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6962                 return "TPC6";
6963         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6964                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6965                         return "TPC7";
6966                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6967                         return "NIC4";
6968                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6969                         return "NIC5";
6970                 break;
6971         default:
6972                 break;
6973         }
6974
6975         dev_err(hdev->dev,
6976                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6977                 val,
6978                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6979                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6980                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6981                         RAZWI_INITIATOR_AXI_ID_MASK);
6982
6983         return "unknown initiator";
6984 }
6985
6986 static void gaudi_print_razwi_info(struct hl_device *hdev)
6987 {
6988         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6989                 dev_err_ratelimited(hdev->dev,
6990                         "RAZWI event caused by illegal write of %s\n",
6991                         gaudi_get_razwi_initiator_name(hdev, true));
6992                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6993         }
6994
6995         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6996                 dev_err_ratelimited(hdev->dev,
6997                         "RAZWI event caused by illegal read of %s\n",
6998                         gaudi_get_razwi_initiator_name(hdev, false));
6999                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7000         }
7001 }
7002
7003 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7004 {
7005         struct gaudi_device *gaudi = hdev->asic_specific;
7006         u64 addr;
7007         u32 val;
7008
7009         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7010                 return;
7011
7012         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7013         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7014                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7015                 addr <<= 32;
7016                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7017
7018                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7019                                         addr);
7020
7021                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7022         }
7023
7024         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7025         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7026                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7027                 addr <<= 32;
7028                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7029
7030                 dev_err_ratelimited(hdev->dev,
7031                                 "MMU access error on va 0x%llx\n", addr);
7032
7033                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7034         }
7035 }
7036
7037 /*
7038  *  +-------------------+------------------------------------------------------+
7039  *  | Configuration Reg |                     Description                      |
7040  *  |      Address      |                                                      |
7041  *  +-------------------+------------------------------------------------------+
7042  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7043  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7044  *  |                   |0xF34 memory wrappers 63:32                           |
7045  *  |                   |0xF38 memory wrappers 95:64                           |
7046  *  |                   |0xF3C memory wrappers 127:96                          |
7047  *  +-------------------+------------------------------------------------------+
7048  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7049  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7050  *  |                   |0xF44 memory wrappers 63:32                           |
7051  *  |                   |0xF48 memory wrappers 95:64                           |
7052  *  |                   |0xF4C memory wrappers 127:96                          |
7053  *  +-------------------+------------------------------------------------------+
7054  */
7055 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7056                 struct ecc_info_extract_params *params, u64 *ecc_address,
7057                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7058 {
7059         struct gaudi_device *gaudi = hdev->asic_specific;
7060         u32 i, num_mem_regs, reg, err_bit;
7061         u64 err_addr, err_word = 0;
7062         int rc = 0;
7063
7064         num_mem_regs = params->num_memories / 32 +
7065                         ((params->num_memories % 32) ? 1 : 0);
7066
7067         if (params->block_address >= CFG_BASE)
7068                 params->block_address -= CFG_BASE;
7069
7070         if (params->derr)
7071                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7072         else
7073                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7074
7075         if (params->disable_clock_gating) {
7076                 mutex_lock(&gaudi->clk_gate_mutex);
7077                 hdev->asic_funcs->disable_clock_gating(hdev);
7078         }
7079
7080         /* Set invalid wrapper index */
7081         *memory_wrapper_idx = 0xFF;
7082
7083         /* Iterate through memory wrappers, a single bit must be set */
7084         for (i = 0 ; i < num_mem_regs ; i++) {
7085                 err_addr += i * 4;
7086                 err_word = RREG32(err_addr);
7087                 if (err_word) {
7088                         err_bit = __ffs(err_word);
7089                         *memory_wrapper_idx = err_bit + (32 * i);
7090                         break;
7091                 }
7092         }
7093
7094         if (*memory_wrapper_idx == 0xFF) {
7095                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7096                 rc = -EINVAL;
7097                 goto enable_clk_gate;
7098         }
7099
7100         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7101                         *memory_wrapper_idx);
7102
7103         *ecc_address =
7104                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7105         *ecc_syndrom =
7106                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7107
7108         /* Clear error indication */
7109         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7110         if (params->derr)
7111                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7112         else
7113                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7114
7115         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7116
7117 enable_clk_gate:
7118         if (params->disable_clock_gating) {
7119                 hdev->asic_funcs->set_clock_gating(hdev);
7120
7121                 mutex_unlock(&gaudi->clk_gate_mutex);
7122         }
7123
7124         return rc;
7125 }
7126
7127 /*
7128  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7129  *
7130  * @idx: the current pi/ci value
7131  * @q_len: the queue length (power of 2)
7132  *
7133  * @return the cyclically decremented index
7134  */
7135 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7136 {
7137         u32 mask = q_len - 1;
7138
7139         /*
7140          * modular decrement is equivalent to adding (queue_size -1)
7141          * later we take LSBs to make sure the value is in the
7142          * range [0, queue_len - 1]
7143          */
7144         return (idx + q_len - 1) & mask;
7145 }
7146
7147 /**
7148  * gaudi_print_sw_config_stream_data - print SW config stream data
7149  *
7150  * @hdev: pointer to the habanalabs device structure
7151  * @stream: the QMAN's stream
7152  * @qman_base: base address of QMAN registers block
7153  */
7154 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7155                                                 u64 qman_base)
7156 {
7157         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7158         u32 cq_ptr_lo_off, size;
7159
7160         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7161
7162         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7163                                                 stream * cq_ptr_lo_off;
7164         cq_ptr_hi = cq_ptr_lo +
7165                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7166         cq_tsize = cq_ptr_lo +
7167                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7168
7169         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7170         size = RREG32(cq_tsize);
7171         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7172                                                         stream, cq_ptr, size);
7173 }
7174
7175 /**
7176  * gaudi_print_last_pqes_on_err - print last PQEs on error
7177  *
7178  * @hdev: pointer to the habanalabs device structure
7179  * @qid_base: first QID of the QMAN (out of 4 streams)
7180  * @stream: the QMAN's stream
7181  * @qman_base: base address of QMAN registers block
7182  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7183  */
7184 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7185                                                 u32 stream, u64 qman_base,
7186                                                 bool pr_sw_conf)
7187 {
7188         u32 ci, qm_ci_stream_off, queue_len;
7189         struct hl_hw_queue *q;
7190         u64 pq_ci;
7191         int i;
7192
7193         q = &hdev->kernel_queues[qid_base + stream];
7194
7195         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7196         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7197                                                 stream * qm_ci_stream_off;
7198
7199         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7200                                         q->int_queue_len : HL_QUEUE_LENGTH;
7201
7202         hdev->asic_funcs->hw_queues_lock(hdev);
7203
7204         if (pr_sw_conf)
7205                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7206
7207         ci = RREG32(pq_ci);
7208
7209         /* we should start printing form ci -1 */
7210         ci = gaudi_queue_idx_dec(ci, queue_len);
7211
7212         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7213                 struct hl_bd *bd;
7214                 u64 addr;
7215                 u32 len;
7216
7217                 bd = q->kernel_address;
7218                 bd += ci;
7219
7220                 len = le32_to_cpu(bd->len);
7221                 /* len 0 means uninitialized entry- break */
7222                 if (!len)
7223                         break;
7224
7225                 addr = le64_to_cpu(bd->ptr);
7226
7227                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7228                                                         stream, ci, addr, len);
7229
7230                 /* get previous ci, wrap if needed */
7231                 ci = gaudi_queue_idx_dec(ci, queue_len);
7232         }
7233
7234         hdev->asic_funcs->hw_queues_unlock(hdev);
7235 }
7236
7237 /**
7238  * print_qman_data_on_err - extract QMAN data on error
7239  *
7240  * @hdev: pointer to the habanalabs device structure
7241  * @qid_base: first QID of the QMAN (out of 4 streams)
7242  * @stream: the QMAN's stream
7243  * @qman_base: base address of QMAN registers block
7244  *
7245  * This function attempt to exatract as much data as possible on QMAN error.
7246  * On upper CP print the SW config stream data and last 8 PQEs.
7247  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7248  */
7249 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7250                                                 u32 stream, u64 qman_base)
7251 {
7252         u32 i;
7253
7254         if (stream != QMAN_STREAMS) {
7255                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7256                                                                         true);
7257                 return;
7258         }
7259
7260         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7261
7262         for (i = 0; i < QMAN_STREAMS; i++)
7263                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7264                                                                         false);
7265 }
7266
7267 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7268                                           const char *qm_name,
7269                                           u64 qman_base,
7270                                           u32 qid_base)
7271 {
7272         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7273         u64 glbl_sts_addr, arb_err_addr;
7274         char reg_desc[32];
7275
7276         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7277         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7278
7279         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7280         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7281                 glbl_sts_clr_val = 0;
7282                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7283
7284                 if (!glbl_sts_val)
7285                         continue;
7286
7287                 if (i == QMAN_STREAMS)
7288                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7289                 else
7290                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7291
7292                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7293                         if (glbl_sts_val & BIT(j)) {
7294                                 dev_err_ratelimited(hdev->dev,
7295                                                 "%s %s. err cause: %s\n",
7296                                                 qm_name, reg_desc,
7297                                                 gaudi_qman_error_cause[j]);
7298                                 glbl_sts_clr_val |= BIT(j);
7299                         }
7300                 }
7301
7302                 /* Write 1 clear errors */
7303                 if (!hdev->stop_on_err)
7304                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7305                 else
7306                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7307         }
7308
7309         arb_err_val = RREG32(arb_err_addr);
7310
7311         if (!arb_err_val)
7312                 return;
7313
7314         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7315                 if (arb_err_val & BIT(j)) {
7316                         dev_err_ratelimited(hdev->dev,
7317                                         "%s ARB_ERR. err cause: %s\n",
7318                                         qm_name,
7319                                         gaudi_qman_arb_error_cause[j]);
7320                 }
7321         }
7322 }
7323
7324 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7325                 struct hl_eq_sm_sei_data *sei_data)
7326 {
7327         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7328
7329         switch (sei_data->sei_cause) {
7330         case SM_SEI_SO_OVERFLOW:
7331                 dev_err(hdev->dev,
7332                         "SM %u SEI Error: SO %u overflow/underflow",
7333                         index, le32_to_cpu(sei_data->sei_log));
7334                 break;
7335         case SM_SEI_LBW_4B_UNALIGNED:
7336                 dev_err(hdev->dev,
7337                         "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7338                         index, le32_to_cpu(sei_data->sei_log));
7339                 break;
7340         case SM_SEI_AXI_RESPONSE_ERR:
7341                 dev_err(hdev->dev,
7342                         "SM %u SEI Error: AXI ID %u response error",
7343                         index, le32_to_cpu(sei_data->sei_log));
7344                 break;
7345         default:
7346                 dev_err(hdev->dev, "Unknown SM SEI cause %u",
7347                                 le32_to_cpu(sei_data->sei_log));
7348                 break;
7349         }
7350 }
7351
7352 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7353                 struct hl_eq_ecc_data *ecc_data)
7354 {
7355         struct ecc_info_extract_params params;
7356         u64 ecc_address = 0, ecc_syndrom = 0;
7357         u8 index, memory_wrapper_idx = 0;
7358         bool extract_info_from_fw;
7359         int rc;
7360
7361         switch (event_type) {
7362         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7363         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7364                 extract_info_from_fw = true;
7365                 break;
7366         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7367                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7368                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7369                 params.num_memories = 90;
7370                 params.derr = false;
7371                 params.disable_clock_gating = true;
7372                 extract_info_from_fw = false;
7373                 break;
7374         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7375                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7376                 params.block_address =
7377                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7378                 params.num_memories = 90;
7379                 params.derr = true;
7380                 params.disable_clock_gating = true;
7381                 extract_info_from_fw = false;
7382                 break;
7383         case GAUDI_EVENT_MME0_ACC_SERR:
7384         case GAUDI_EVENT_MME1_ACC_SERR:
7385         case GAUDI_EVENT_MME2_ACC_SERR:
7386         case GAUDI_EVENT_MME3_ACC_SERR:
7387                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7388                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7389                 params.num_memories = 128;
7390                 params.derr = false;
7391                 params.disable_clock_gating = true;
7392                 extract_info_from_fw = false;
7393                 break;
7394         case GAUDI_EVENT_MME0_ACC_DERR:
7395         case GAUDI_EVENT_MME1_ACC_DERR:
7396         case GAUDI_EVENT_MME2_ACC_DERR:
7397         case GAUDI_EVENT_MME3_ACC_DERR:
7398                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7399                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7400                 params.num_memories = 128;
7401                 params.derr = true;
7402                 params.disable_clock_gating = true;
7403                 extract_info_from_fw = false;
7404                 break;
7405         case GAUDI_EVENT_MME0_SBAB_SERR:
7406         case GAUDI_EVENT_MME1_SBAB_SERR:
7407         case GAUDI_EVENT_MME2_SBAB_SERR:
7408         case GAUDI_EVENT_MME3_SBAB_SERR:
7409                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7410                 params.block_address =
7411                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7412                 params.num_memories = 33;
7413                 params.derr = false;
7414                 params.disable_clock_gating = true;
7415                 extract_info_from_fw = false;
7416                 break;
7417         case GAUDI_EVENT_MME0_SBAB_DERR:
7418         case GAUDI_EVENT_MME1_SBAB_DERR:
7419         case GAUDI_EVENT_MME2_SBAB_DERR:
7420         case GAUDI_EVENT_MME3_SBAB_DERR:
7421                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7422                 params.block_address =
7423                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7424                 params.num_memories = 33;
7425                 params.derr = true;
7426                 params.disable_clock_gating = true;
7427                 extract_info_from_fw = false;
7428                 break;
7429         default:
7430                 return;
7431         }
7432
7433         if (extract_info_from_fw) {
7434                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7435                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7436                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7437         } else {
7438                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7439                                 &ecc_syndrom, &memory_wrapper_idx);
7440                 if (rc)
7441                         return;
7442         }
7443
7444         dev_err(hdev->dev,
7445                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7446                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7447 }
7448
7449 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7450 {
7451         u64 qman_base;
7452         char desc[32];
7453         u32 qid_base;
7454         u8 index;
7455
7456         switch (event_type) {
7457         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7458                 index = event_type - GAUDI_EVENT_TPC0_QM;
7459                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7460                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7461                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7462                 break;
7463         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7464                 index = event_type - GAUDI_EVENT_MME0_QM;
7465                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7466                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7467                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7468                 break;
7469         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7470                 index = event_type - GAUDI_EVENT_DMA0_QM;
7471                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7472                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7473                 if (index > 1)
7474                         qid_base++;
7475                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7476                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7477                 break;
7478         case GAUDI_EVENT_NIC0_QM0:
7479                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7480                 qman_base = mmNIC0_QM0_BASE;
7481                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7482                 break;
7483         case GAUDI_EVENT_NIC0_QM1:
7484                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7485                 qman_base = mmNIC0_QM1_BASE;
7486                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7487                 break;
7488         case GAUDI_EVENT_NIC1_QM0:
7489                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7490                 qman_base = mmNIC1_QM0_BASE;
7491                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7492                 break;
7493         case GAUDI_EVENT_NIC1_QM1:
7494                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7495                 qman_base = mmNIC1_QM1_BASE;
7496                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7497                 break;
7498         case GAUDI_EVENT_NIC2_QM0:
7499                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7500                 qman_base = mmNIC2_QM0_BASE;
7501                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7502                 break;
7503         case GAUDI_EVENT_NIC2_QM1:
7504                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7505                 qman_base = mmNIC2_QM1_BASE;
7506                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7507                 break;
7508         case GAUDI_EVENT_NIC3_QM0:
7509                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7510                 qman_base = mmNIC3_QM0_BASE;
7511                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7512                 break;
7513         case GAUDI_EVENT_NIC3_QM1:
7514                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7515                 qman_base = mmNIC3_QM1_BASE;
7516                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7517                 break;
7518         case GAUDI_EVENT_NIC4_QM0:
7519                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7520                 qman_base = mmNIC4_QM0_BASE;
7521                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7522                 break;
7523         case GAUDI_EVENT_NIC4_QM1:
7524                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7525                 qman_base = mmNIC4_QM1_BASE;
7526                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7527                 break;
7528         default:
7529                 return;
7530         }
7531
7532         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7533 }
7534
7535 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7536                                         bool razwi)
7537 {
7538         char desc[64] = "";
7539
7540         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7541         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7542                 event_type, desc);
7543
7544         if (razwi) {
7545                 gaudi_print_razwi_info(hdev);
7546                 gaudi_print_mmu_error_info(hdev);
7547         }
7548 }
7549
7550 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7551                                         struct cpucp_pkt_sync_err *sync_err)
7552 {
7553         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7554
7555         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7556                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7557 }
7558
7559 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7560                                         struct hl_eq_fw_alive *fw_alive)
7561 {
7562         dev_err(hdev->dev,
7563                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7564                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7565                 "Minor" : "Critical", fw_alive->process_id,
7566                 fw_alive->thread_id, fw_alive->uptime_seconds);
7567 }
7568
7569 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7570 {
7571         struct gaudi_device *gaudi = hdev->asic_specific;
7572
7573         /* Unmask all IRQs since some could have been received
7574          * during the soft reset
7575          */
7576         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7577 }
7578
7579 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7580                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7581 {
7582         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7583         int rc = 0;
7584
7585         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7586                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7587                 if (!hbm_ecc_data) {
7588                         dev_err(hdev->dev, "No FW ECC data");
7589                         return 0;
7590                 }
7591
7592                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7593                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7594                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7595                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7596                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7597                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7598                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7599                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7600                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7601                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7602                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7603                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7604                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7605                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7606
7607                 dev_err(hdev->dev,
7608                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7609                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7610                 dev_err(hdev->dev,
7611                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7612                         device, ch, hbm_ecc_data->first_addr, type,
7613                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7614                         hbm_ecc_data->dec_cnt);
7615                 return 0;
7616         }
7617
7618         if (hdev->asic_prop.fw_security_enabled) {
7619                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7620                 return 0;
7621         }
7622
7623         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7624         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7625                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7626                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7627                 if (val) {
7628                         rc = -EIO;
7629                         dev_err(hdev->dev,
7630                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7631                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7632                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7633                                 (val >> 4) & 0x1);
7634
7635                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7636                         dev_err(hdev->dev,
7637                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7638                                 device, ch * 2,
7639                                 RREG32(base + ch * 0x1000 + 0x064),
7640                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7641                                 (val2 & 0xFF0000) >> 16,
7642                                 (val2 & 0xFF000000) >> 24);
7643                 }
7644
7645                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7646                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7647                 if (val) {
7648                         rc = -EIO;
7649                         dev_err(hdev->dev,
7650                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7651                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7652                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7653                                 (val >> 4) & 0x1);
7654
7655                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7656                         dev_err(hdev->dev,
7657                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7658                                 device, ch * 2 + 1,
7659                                 RREG32(base + ch * 0x1000 + 0x074),
7660                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7661                                 (val2 & 0xFF0000) >> 16,
7662                                 (val2 & 0xFF000000) >> 24);
7663                 }
7664
7665                 /* Clear interrupts */
7666                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7667                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7668                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7669                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7670                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7671                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7672         }
7673
7674         val  = RREG32(base + 0x8F30);
7675         val2 = RREG32(base + 0x8F34);
7676         if (val | val2) {
7677                 rc = -EIO;
7678                 dev_err(hdev->dev,
7679                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7680                         device, val, val2);
7681         }
7682         val  = RREG32(base + 0x8F40);
7683         val2 = RREG32(base + 0x8F44);
7684         if (val | val2) {
7685                 rc = -EIO;
7686                 dev_err(hdev->dev,
7687                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7688                         device, val, val2);
7689         }
7690
7691         return rc;
7692 }
7693
7694 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7695 {
7696         switch (hbm_event_type) {
7697         case GAUDI_EVENT_HBM0_SPI_0:
7698         case GAUDI_EVENT_HBM0_SPI_1:
7699                 return 0;
7700         case GAUDI_EVENT_HBM1_SPI_0:
7701         case GAUDI_EVENT_HBM1_SPI_1:
7702                 return 1;
7703         case GAUDI_EVENT_HBM2_SPI_0:
7704         case GAUDI_EVENT_HBM2_SPI_1:
7705                 return 2;
7706         case GAUDI_EVENT_HBM3_SPI_0:
7707         case GAUDI_EVENT_HBM3_SPI_1:
7708                 return 3;
7709         default:
7710                 break;
7711         }
7712
7713         /* Should never happen */
7714         return 0;
7715 }
7716
7717 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7718                                         char *interrupt_name)
7719 {
7720         struct gaudi_device *gaudi = hdev->asic_specific;
7721         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7722         bool soft_reset_required = false;
7723
7724         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7725          * gating, and thus cannot be done in CPU-CP and should be done instead
7726          * by the driver.
7727          */
7728
7729         mutex_lock(&gaudi->clk_gate_mutex);
7730
7731         hdev->asic_funcs->disable_clock_gating(hdev);
7732
7733         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7734                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7735
7736         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7737                 if (tpc_interrupts_cause & BIT(i)) {
7738                         dev_err_ratelimited(hdev->dev,
7739                                         "TPC%d_%s interrupt cause: %s\n",
7740                                         tpc_id, interrupt_name,
7741                                         gaudi_tpc_interrupts_cause[i]);
7742                         /* If this is QM error, we need to soft-reset */
7743                         if (i == 15)
7744                                 soft_reset_required = true;
7745                 }
7746
7747         /* Clear interrupts */
7748         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7749
7750         hdev->asic_funcs->set_clock_gating(hdev);
7751
7752         mutex_unlock(&gaudi->clk_gate_mutex);
7753
7754         return soft_reset_required;
7755 }
7756
7757 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7758 {
7759         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7760 }
7761
7762 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7763 {
7764         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7765 }
7766
7767 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7768                                         u16 event_type)
7769 {
7770         switch (event_type) {
7771         case GAUDI_EVENT_FIX_POWER_ENV_S:
7772                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7773                 dev_info_ratelimited(hdev->dev,
7774                         "Clock throttling due to power consumption\n");
7775                 break;
7776
7777         case GAUDI_EVENT_FIX_POWER_ENV_E:
7778                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7779                 dev_info_ratelimited(hdev->dev,
7780                         "Power envelop is safe, back to optimal clock\n");
7781                 break;
7782
7783         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7784                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7785                 dev_info_ratelimited(hdev->dev,
7786                         "Clock throttling due to overheating\n");
7787                 break;
7788
7789         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7790                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7791                 dev_info_ratelimited(hdev->dev,
7792                         "Thermal envelop is safe, back to optimal clock\n");
7793                 break;
7794
7795         default:
7796                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7797                         event_type);
7798                 break;
7799         }
7800 }
7801
7802 static void gaudi_handle_eqe(struct hl_device *hdev,
7803                                 struct hl_eq_entry *eq_entry)
7804 {
7805         struct gaudi_device *gaudi = hdev->asic_specific;
7806         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7807         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7808                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7809         u8 cause;
7810         bool reset_required;
7811
7812         gaudi->events_stat[event_type]++;
7813         gaudi->events_stat_aggregate[event_type]++;
7814
7815         switch (event_type) {
7816         case GAUDI_EVENT_PCIE_CORE_DERR:
7817         case GAUDI_EVENT_PCIE_IF_DERR:
7818         case GAUDI_EVENT_PCIE_PHY_DERR:
7819         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7820         case GAUDI_EVENT_MME0_ACC_DERR:
7821         case GAUDI_EVENT_MME0_SBAB_DERR:
7822         case GAUDI_EVENT_MME1_ACC_DERR:
7823         case GAUDI_EVENT_MME1_SBAB_DERR:
7824         case GAUDI_EVENT_MME2_ACC_DERR:
7825         case GAUDI_EVENT_MME2_SBAB_DERR:
7826         case GAUDI_EVENT_MME3_ACC_DERR:
7827         case GAUDI_EVENT_MME3_SBAB_DERR:
7828         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7829                 fallthrough;
7830         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7831         case GAUDI_EVENT_PSOC_MEM_DERR:
7832         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7833         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7834         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7835         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7836         case GAUDI_EVENT_MMU_DERR:
7837         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7838                 gaudi_print_irq_info(hdev, event_type, true);
7839                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7840                 goto reset_device;
7841
7842         case GAUDI_EVENT_GIC500:
7843         case GAUDI_EVENT_AXI_ECC:
7844         case GAUDI_EVENT_L2_RAM_ECC:
7845         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7846                 gaudi_print_irq_info(hdev, event_type, false);
7847                 goto reset_device;
7848
7849         case GAUDI_EVENT_HBM0_SPI_0:
7850         case GAUDI_EVENT_HBM1_SPI_0:
7851         case GAUDI_EVENT_HBM2_SPI_0:
7852         case GAUDI_EVENT_HBM3_SPI_0:
7853                 gaudi_print_irq_info(hdev, event_type, false);
7854                 gaudi_hbm_read_interrupts(hdev,
7855                                 gaudi_hbm_event_to_dev(event_type),
7856                                 &eq_entry->hbm_ecc_data);
7857                 goto reset_device;
7858
7859         case GAUDI_EVENT_HBM0_SPI_1:
7860         case GAUDI_EVENT_HBM1_SPI_1:
7861         case GAUDI_EVENT_HBM2_SPI_1:
7862         case GAUDI_EVENT_HBM3_SPI_1:
7863                 gaudi_print_irq_info(hdev, event_type, false);
7864                 gaudi_hbm_read_interrupts(hdev,
7865                                 gaudi_hbm_event_to_dev(event_type),
7866                                 &eq_entry->hbm_ecc_data);
7867                 hl_fw_unmask_irq(hdev, event_type);
7868                 break;
7869
7870         case GAUDI_EVENT_TPC0_DEC:
7871         case GAUDI_EVENT_TPC1_DEC:
7872         case GAUDI_EVENT_TPC2_DEC:
7873         case GAUDI_EVENT_TPC3_DEC:
7874         case GAUDI_EVENT_TPC4_DEC:
7875         case GAUDI_EVENT_TPC5_DEC:
7876         case GAUDI_EVENT_TPC6_DEC:
7877         case GAUDI_EVENT_TPC7_DEC:
7878                 gaudi_print_irq_info(hdev, event_type, true);
7879                 reset_required = gaudi_tpc_read_interrupts(hdev,
7880                                         tpc_dec_event_to_tpc_id(event_type),
7881                                         "AXI_SLV_DEC_Error");
7882                 if (reset_required) {
7883                         dev_err(hdev->dev, "hard reset required due to %s\n",
7884                                 gaudi_irq_map_table[event_type].name);
7885
7886                         goto reset_device;
7887                 } else {
7888                         hl_fw_unmask_irq(hdev, event_type);
7889                 }
7890                 break;
7891
7892         case GAUDI_EVENT_TPC0_KRN_ERR:
7893         case GAUDI_EVENT_TPC1_KRN_ERR:
7894         case GAUDI_EVENT_TPC2_KRN_ERR:
7895         case GAUDI_EVENT_TPC3_KRN_ERR:
7896         case GAUDI_EVENT_TPC4_KRN_ERR:
7897         case GAUDI_EVENT_TPC5_KRN_ERR:
7898         case GAUDI_EVENT_TPC6_KRN_ERR:
7899         case GAUDI_EVENT_TPC7_KRN_ERR:
7900                 gaudi_print_irq_info(hdev, event_type, true);
7901                 reset_required = gaudi_tpc_read_interrupts(hdev,
7902                                         tpc_krn_event_to_tpc_id(event_type),
7903                                         "KRN_ERR");
7904                 if (reset_required) {
7905                         dev_err(hdev->dev, "hard reset required due to %s\n",
7906                                 gaudi_irq_map_table[event_type].name);
7907
7908                         goto reset_device;
7909                 } else {
7910                         hl_fw_unmask_irq(hdev, event_type);
7911                 }
7912                 break;
7913
7914         case GAUDI_EVENT_PCIE_CORE_SERR:
7915         case GAUDI_EVENT_PCIE_IF_SERR:
7916         case GAUDI_EVENT_PCIE_PHY_SERR:
7917         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7918         case GAUDI_EVENT_MME0_ACC_SERR:
7919         case GAUDI_EVENT_MME0_SBAB_SERR:
7920         case GAUDI_EVENT_MME1_ACC_SERR:
7921         case GAUDI_EVENT_MME1_SBAB_SERR:
7922         case GAUDI_EVENT_MME2_ACC_SERR:
7923         case GAUDI_EVENT_MME2_SBAB_SERR:
7924         case GAUDI_EVENT_MME3_ACC_SERR:
7925         case GAUDI_EVENT_MME3_SBAB_SERR:
7926         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7927         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7928         case GAUDI_EVENT_PSOC_MEM_SERR:
7929         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7930         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7931         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7932         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7933                 fallthrough;
7934         case GAUDI_EVENT_MMU_SERR:
7935                 gaudi_print_irq_info(hdev, event_type, true);
7936                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7937                 hl_fw_unmask_irq(hdev, event_type);
7938                 break;
7939
7940         case GAUDI_EVENT_PCIE_DEC:
7941         case GAUDI_EVENT_MME0_WBC_RSP:
7942         case GAUDI_EVENT_MME0_SBAB0_RSP:
7943         case GAUDI_EVENT_MME1_WBC_RSP:
7944         case GAUDI_EVENT_MME1_SBAB0_RSP:
7945         case GAUDI_EVENT_MME2_WBC_RSP:
7946         case GAUDI_EVENT_MME2_SBAB0_RSP:
7947         case GAUDI_EVENT_MME3_WBC_RSP:
7948         case GAUDI_EVENT_MME3_SBAB0_RSP:
7949         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7950         case GAUDI_EVENT_PSOC_AXI_DEC:
7951         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7952         case GAUDI_EVENT_MMU_PAGE_FAULT:
7953         case GAUDI_EVENT_MMU_WR_PERM:
7954         case GAUDI_EVENT_RAZWI_OR_ADC:
7955         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7956         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7957         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7958                 fallthrough;
7959         case GAUDI_EVENT_NIC0_QM0:
7960         case GAUDI_EVENT_NIC0_QM1:
7961         case GAUDI_EVENT_NIC1_QM0:
7962         case GAUDI_EVENT_NIC1_QM1:
7963         case GAUDI_EVENT_NIC2_QM0:
7964         case GAUDI_EVENT_NIC2_QM1:
7965         case GAUDI_EVENT_NIC3_QM0:
7966         case GAUDI_EVENT_NIC3_QM1:
7967         case GAUDI_EVENT_NIC4_QM0:
7968         case GAUDI_EVENT_NIC4_QM1:
7969         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7970                 gaudi_print_irq_info(hdev, event_type, true);
7971                 gaudi_handle_qman_err(hdev, event_type);
7972                 hl_fw_unmask_irq(hdev, event_type);
7973                 break;
7974
7975         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7976                 gaudi_print_irq_info(hdev, event_type, true);
7977                 goto reset_device;
7978
7979         case GAUDI_EVENT_TPC0_BMON_SPMU:
7980         case GAUDI_EVENT_TPC1_BMON_SPMU:
7981         case GAUDI_EVENT_TPC2_BMON_SPMU:
7982         case GAUDI_EVENT_TPC3_BMON_SPMU:
7983         case GAUDI_EVENT_TPC4_BMON_SPMU:
7984         case GAUDI_EVENT_TPC5_BMON_SPMU:
7985         case GAUDI_EVENT_TPC6_BMON_SPMU:
7986         case GAUDI_EVENT_TPC7_BMON_SPMU:
7987         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7988                 gaudi_print_irq_info(hdev, event_type, false);
7989                 hl_fw_unmask_irq(hdev, event_type);
7990                 break;
7991
7992         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7993                 gaudi_print_irq_info(hdev, event_type, false);
7994                 gaudi_print_sm_sei_info(hdev, event_type,
7995                                         &eq_entry->sm_sei_data);
7996                 hl_fw_unmask_irq(hdev, event_type);
7997                 break;
7998
7999         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8000                 gaudi_print_clk_change_info(hdev, event_type);
8001                 hl_fw_unmask_irq(hdev, event_type);
8002                 break;
8003
8004         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8005                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8006                 dev_err(hdev->dev,
8007                         "Received high temp H/W interrupt %d (cause %d)\n",
8008                         event_type, cause);
8009                 break;
8010
8011         case GAUDI_EVENT_DEV_RESET_REQ:
8012                 gaudi_print_irq_info(hdev, event_type, false);
8013                 goto reset_device;
8014
8015         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8016                 gaudi_print_irq_info(hdev, event_type, false);
8017                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8018                 goto reset_device;
8019
8020         case GAUDI_EVENT_FW_ALIVE_S:
8021                 gaudi_print_irq_info(hdev, event_type, false);
8022                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8023                 goto reset_device;
8024
8025         default:
8026                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8027                                 event_type);
8028                 break;
8029         }
8030
8031         return;
8032
8033 reset_device:
8034         if (hdev->hard_reset_on_fw_events)
8035                 hl_device_reset(hdev, HL_RESET_HARD);
8036         else
8037                 hl_fw_unmask_irq(hdev, event_type);
8038 }
8039
8040 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8041                                         u32 *size)
8042 {
8043         struct gaudi_device *gaudi = hdev->asic_specific;
8044
8045         if (aggregate) {
8046                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8047                 return gaudi->events_stat_aggregate;
8048         }
8049
8050         *size = (u32) sizeof(gaudi->events_stat);
8051         return gaudi->events_stat;
8052 }
8053
8054 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8055                                         u32 flags)
8056 {
8057         struct gaudi_device *gaudi = hdev->asic_specific;
8058         u32 status, timeout_usec;
8059         int rc;
8060
8061         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8062                 hdev->hard_reset_pending)
8063                 return 0;
8064
8065         if (hdev->pldm)
8066                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8067         else
8068                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8069
8070         /* L0 & L1 invalidation */
8071         WREG32(mmSTLB_INV_PS, 3);
8072         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8073         WREG32(mmSTLB_INV_PS, 2);
8074
8075         rc = hl_poll_timeout(
8076                 hdev,
8077                 mmSTLB_INV_PS,
8078                 status,
8079                 !status,
8080                 1000,
8081                 timeout_usec);
8082
8083         WREG32(mmSTLB_INV_SET, 0);
8084
8085         if (rc) {
8086                 dev_err_ratelimited(hdev->dev,
8087                                         "MMU cache invalidation timeout\n");
8088                 hl_device_reset(hdev, HL_RESET_HARD);
8089         }
8090
8091         return rc;
8092 }
8093
8094 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8095                                                 bool is_hard, u32 flags,
8096                                                 u32 asid, u64 va, u64 size)
8097 {
8098         /* Treat as invalidate all because there is no range invalidation
8099          * in Gaudi
8100          */
8101         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8102 }
8103
8104 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8105                                         u32 asid, u64 phys_addr)
8106 {
8107         u32 status, timeout_usec;
8108         int rc;
8109
8110         if (hdev->pldm)
8111                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8112         else
8113                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8114
8115         WREG32(MMU_ASID, asid);
8116         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8117         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8118         WREG32(MMU_BUSY, 0x80000000);
8119
8120         rc = hl_poll_timeout(
8121                 hdev,
8122                 MMU_BUSY,
8123                 status,
8124                 !(status & 0x80000000),
8125                 1000,
8126                 timeout_usec);
8127
8128         if (rc) {
8129                 dev_err(hdev->dev,
8130                         "Timeout during MMU hop0 config of asid %d\n", asid);
8131                 return rc;
8132         }
8133
8134         return 0;
8135 }
8136
8137 static int gaudi_send_heartbeat(struct hl_device *hdev)
8138 {
8139         struct gaudi_device *gaudi = hdev->asic_specific;
8140
8141         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8142                 return 0;
8143
8144         return hl_fw_send_heartbeat(hdev);
8145 }
8146
8147 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8148 {
8149         struct gaudi_device *gaudi = hdev->asic_specific;
8150         struct asic_fixed_properties *prop = &hdev->asic_prop;
8151         int rc;
8152
8153         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8154                 return 0;
8155
8156         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8157                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8158                                         mmCPU_BOOT_ERR1);
8159         if (rc)
8160                 return rc;
8161
8162         if (!strlen(prop->cpucp_info.card_name))
8163                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8164                                 CARD_NAME_MAX_LEN);
8165
8166         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8167
8168         set_default_power_values(hdev);
8169
8170         hdev->max_power = prop->max_power_default;
8171
8172         return 0;
8173 }
8174
8175 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8176                                         u8 mask_len, struct seq_file *s)
8177 {
8178         struct gaudi_device *gaudi = hdev->asic_specific;
8179         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8180         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8181         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8182         unsigned long *mask = (unsigned long *)mask_arr;
8183         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8184         bool is_idle = true, is_eng_idle, is_slave;
8185         u64 offset;
8186         int i, dma_id, port;
8187
8188         mutex_lock(&gaudi->clk_gate_mutex);
8189
8190         hdev->asic_funcs->disable_clock_gating(hdev);
8191
8192         if (s)
8193                 seq_puts(s,
8194                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8195                         "---  -------  ------------  ----------  -------------\n");
8196
8197         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8198                 dma_id = gaudi_dma_assignment[i];
8199                 offset = dma_id * DMA_QMAN_OFFSET;
8200
8201                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8202                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8203                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8204                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8205                                 IS_DMA_IDLE(dma_core_sts0);
8206                 is_idle &= is_eng_idle;
8207
8208                 if (mask && !is_eng_idle)
8209                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8210                 if (s)
8211                         seq_printf(s, fmt, dma_id,
8212                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8213                                 qm_cgm_sts, dma_core_sts0);
8214         }
8215
8216         if (s)
8217                 seq_puts(s,
8218                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8219                         "---  -------  ------------  ----------  ----------\n");
8220
8221         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8222                 offset = i * TPC_QMAN_OFFSET;
8223                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8224                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8225                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8226                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8227                                 IS_TPC_IDLE(tpc_cfg_sts);
8228                 is_idle &= is_eng_idle;
8229
8230                 if (mask && !is_eng_idle)
8231                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8232                 if (s)
8233                         seq_printf(s, fmt, i,
8234                                 is_eng_idle ? "Y" : "N",
8235                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8236         }
8237
8238         if (s)
8239                 seq_puts(s,
8240                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8241                         "---  -------  ------------  ----------  -----------\n");
8242
8243         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8244                 offset = i * MME_QMAN_OFFSET;
8245                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8246                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8247
8248                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8249                 is_slave = i % 2;
8250                 if (!is_slave) {
8251                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8252                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8253                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8254                 }
8255
8256                 is_idle &= is_eng_idle;
8257
8258                 if (mask && !is_eng_idle)
8259                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8260                 if (s) {
8261                         if (!is_slave)
8262                                 seq_printf(s, fmt, i,
8263                                         is_eng_idle ? "Y" : "N",
8264                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8265                         else
8266                                 seq_printf(s, mme_slave_fmt, i,
8267                                         is_eng_idle ? "Y" : "N", "-",
8268                                         "-", mme_arch_sts);
8269                 }
8270         }
8271
8272         if (s)
8273                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8274                                 "---  -------  ------------  ----------\n");
8275
8276         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8277                 offset = i * NIC_MACRO_QMAN_OFFSET;
8278                 port = 2 * i;
8279                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8280                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8281                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8282                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8283                         is_idle &= is_eng_idle;
8284
8285                         if (mask && !is_eng_idle)
8286                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8287                         if (s)
8288                                 seq_printf(s, nic_fmt, port,
8289                                                 is_eng_idle ? "Y" : "N",
8290                                                 qm_glbl_sts0, qm_cgm_sts);
8291                 }
8292
8293                 port = 2 * i + 1;
8294                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8295                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8296                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8297                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8298                         is_idle &= is_eng_idle;
8299
8300                         if (mask && !is_eng_idle)
8301                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8302                         if (s)
8303                                 seq_printf(s, nic_fmt, port,
8304                                                 is_eng_idle ? "Y" : "N",
8305                                                 qm_glbl_sts0, qm_cgm_sts);
8306                 }
8307         }
8308
8309         if (s)
8310                 seq_puts(s, "\n");
8311
8312         hdev->asic_funcs->set_clock_gating(hdev);
8313
8314         mutex_unlock(&gaudi->clk_gate_mutex);
8315
8316         return is_idle;
8317 }
8318
8319 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8320         __acquires(&gaudi->hw_queues_lock)
8321 {
8322         struct gaudi_device *gaudi = hdev->asic_specific;
8323
8324         spin_lock(&gaudi->hw_queues_lock);
8325 }
8326
8327 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8328         __releases(&gaudi->hw_queues_lock)
8329 {
8330         struct gaudi_device *gaudi = hdev->asic_specific;
8331
8332         spin_unlock(&gaudi->hw_queues_lock);
8333 }
8334
8335 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8336 {
8337         return hdev->pdev->device;
8338 }
8339
8340 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8341                                 size_t max_size)
8342 {
8343         struct gaudi_device *gaudi = hdev->asic_specific;
8344
8345         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8346                 return 0;
8347
8348         return hl_fw_get_eeprom_data(hdev, data, max_size);
8349 }
8350
8351 /*
8352  * this function should be used only during initialization and/or after reset,
8353  * when there are no active users.
8354  */
8355 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8356                                 u32 tpc_id)
8357 {
8358         struct gaudi_device *gaudi = hdev->asic_specific;
8359         u64 kernel_timeout;
8360         u32 status, offset;
8361         int rc;
8362
8363         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8364
8365         if (hdev->pldm)
8366                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8367         else
8368                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8369
8370         mutex_lock(&gaudi->clk_gate_mutex);
8371
8372         hdev->asic_funcs->disable_clock_gating(hdev);
8373
8374         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8375                         lower_32_bits(tpc_kernel));
8376         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8377                         upper_32_bits(tpc_kernel));
8378
8379         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8380                         lower_32_bits(tpc_kernel));
8381         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8382                         upper_32_bits(tpc_kernel));
8383         /* set a valid LUT pointer, content is of no significance */
8384         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8385                         lower_32_bits(tpc_kernel));
8386         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8387                         upper_32_bits(tpc_kernel));
8388
8389         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8390                         lower_32_bits(CFG_BASE +
8391                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8392
8393         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8394                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8395                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8396         /* wait a bit for the engine to start executing */
8397         usleep_range(1000, 1500);
8398
8399         /* wait until engine has finished executing */
8400         rc = hl_poll_timeout(
8401                 hdev,
8402                 mmTPC0_CFG_STATUS + offset,
8403                 status,
8404                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8405                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8406                 1000,
8407                 kernel_timeout);
8408
8409         if (rc) {
8410                 dev_err(hdev->dev,
8411                         "Timeout while waiting for TPC%d icache prefetch\n",
8412                         tpc_id);
8413                 hdev->asic_funcs->set_clock_gating(hdev);
8414                 mutex_unlock(&gaudi->clk_gate_mutex);
8415                 return -EIO;
8416         }
8417
8418         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8419                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8420
8421         /* wait a bit for the engine to start executing */
8422         usleep_range(1000, 1500);
8423
8424         /* wait until engine has finished executing */
8425         rc = hl_poll_timeout(
8426                 hdev,
8427                 mmTPC0_CFG_STATUS + offset,
8428                 status,
8429                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8430                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8431                 1000,
8432                 kernel_timeout);
8433
8434         if (rc) {
8435                 dev_err(hdev->dev,
8436                         "Timeout while waiting for TPC%d vector pipe\n",
8437                         tpc_id);
8438                 hdev->asic_funcs->set_clock_gating(hdev);
8439                 mutex_unlock(&gaudi->clk_gate_mutex);
8440                 return -EIO;
8441         }
8442
8443         rc = hl_poll_timeout(
8444                 hdev,
8445                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8446                 status,
8447                 (status == 0),
8448                 1000,
8449                 kernel_timeout);
8450
8451         hdev->asic_funcs->set_clock_gating(hdev);
8452         mutex_unlock(&gaudi->clk_gate_mutex);
8453
8454         if (rc) {
8455                 dev_err(hdev->dev,
8456                         "Timeout while waiting for TPC%d kernel to execute\n",
8457                         tpc_id);
8458                 return -EIO;
8459         }
8460
8461         return 0;
8462 }
8463
8464 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8465                 struct hl_ctx *ctx)
8466 {
8467         struct gaudi_device *gaudi = hdev->asic_specific;
8468         int min_alloc_order, rc, collective_cb_size;
8469
8470         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8471                 return 0;
8472
8473         hdev->internal_cb_pool_virt_addr =
8474                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8475                                         HOST_SPACE_INTERNAL_CB_SZ,
8476                                         &hdev->internal_cb_pool_dma_addr,
8477                                         GFP_KERNEL | __GFP_ZERO);
8478
8479         if (!hdev->internal_cb_pool_virt_addr)
8480                 return -ENOMEM;
8481
8482         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8483                         sizeof(struct packet_fence);
8484         min_alloc_order = ilog2(collective_cb_size);
8485
8486         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8487         if (!hdev->internal_cb_pool) {
8488                 dev_err(hdev->dev,
8489                         "Failed to create internal CB pool\n");
8490                 rc = -ENOMEM;
8491                 goto free_internal_cb_pool;
8492         }
8493
8494         rc = gen_pool_add(hdev->internal_cb_pool,
8495                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8496                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8497         if (rc) {
8498                 dev_err(hdev->dev,
8499                         "Failed to add memory to internal CB pool\n");
8500                 rc = -EFAULT;
8501                 goto destroy_internal_cb_pool;
8502         }
8503
8504         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8505                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8506                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8507
8508         if (!hdev->internal_cb_va_base) {
8509                 rc = -ENOMEM;
8510                 goto destroy_internal_cb_pool;
8511         }
8512
8513         mutex_lock(&ctx->mmu_lock);
8514         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8515                         hdev->internal_cb_pool_dma_addr,
8516                         HOST_SPACE_INTERNAL_CB_SZ);
8517
8518         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8519         mutex_unlock(&ctx->mmu_lock);
8520
8521         if (rc)
8522                 goto unreserve_internal_cb_pool;
8523
8524         return 0;
8525
8526 unreserve_internal_cb_pool:
8527         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8528                         HOST_SPACE_INTERNAL_CB_SZ);
8529 destroy_internal_cb_pool:
8530         gen_pool_destroy(hdev->internal_cb_pool);
8531 free_internal_cb_pool:
8532         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8533                         HOST_SPACE_INTERNAL_CB_SZ,
8534                         hdev->internal_cb_pool_virt_addr,
8535                         hdev->internal_cb_pool_dma_addr);
8536
8537         return rc;
8538 }
8539
8540 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8541                 struct hl_ctx *ctx)
8542 {
8543         struct gaudi_device *gaudi = hdev->asic_specific;
8544
8545         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8546                 return;
8547
8548         mutex_lock(&ctx->mmu_lock);
8549         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8550                         HOST_SPACE_INTERNAL_CB_SZ);
8551         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8552                         HOST_SPACE_INTERNAL_CB_SZ);
8553         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8554         mutex_unlock(&ctx->mmu_lock);
8555
8556         gen_pool_destroy(hdev->internal_cb_pool);
8557
8558         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8559                         HOST_SPACE_INTERNAL_CB_SZ,
8560                         hdev->internal_cb_pool_virt_addr,
8561                         hdev->internal_cb_pool_dma_addr);
8562 }
8563
8564 static int gaudi_ctx_init(struct hl_ctx *ctx)
8565 {
8566         if (ctx->asid == HL_KERNEL_ASID_ID)
8567                 return 0;
8568
8569         gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8570         return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8571 }
8572
8573 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8574 {
8575         if (ctx->asid == HL_KERNEL_ASID_ID)
8576                 return;
8577
8578         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8579 }
8580
8581 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8582 {
8583         return gaudi_cq_assignment[cq_idx];
8584 }
8585
8586 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8587 {
8588         return sizeof(struct packet_msg_short) +
8589                         sizeof(struct packet_msg_prot) * 2;
8590 }
8591
8592 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8593 {
8594         return sizeof(struct packet_msg_short) * 4 +
8595                         sizeof(struct packet_fence) +
8596                         sizeof(struct packet_msg_prot) * 2;
8597 }
8598
8599 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8600                                 u32 size, bool eb)
8601 {
8602         struct hl_cb *cb = (struct hl_cb *) data;
8603         struct packet_msg_short *pkt;
8604         u32 value, ctl, pkt_size = sizeof(*pkt);
8605
8606         pkt = cb->kernel_address + size;
8607         memset(pkt, 0, pkt_size);
8608
8609         /* Inc by 1, Mode ADD */
8610         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8611         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8612
8613         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8614         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8615         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8616         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8617         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8618         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8619         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8620
8621         pkt->value = cpu_to_le32(value);
8622         pkt->ctl = cpu_to_le32(ctl);
8623
8624         return size + pkt_size;
8625 }
8626
8627 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8628                                         u16 addr)
8629 {
8630         u32 ctl, pkt_size = sizeof(*pkt);
8631
8632         memset(pkt, 0, pkt_size);
8633
8634         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8635         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8636         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8637         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8638         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8639         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8640
8641         pkt->value = cpu_to_le32(value);
8642         pkt->ctl = cpu_to_le32(ctl);
8643
8644         return pkt_size;
8645 }
8646
8647 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8648                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8649                 u16 sob_val, u16 mon_id)
8650 {
8651         u64 monitor_base;
8652         u32 ctl, value, pkt_size = sizeof(*pkt);
8653         u16 msg_addr_offset;
8654         u8 mask;
8655
8656         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8657                 dev_err(hdev->dev,
8658                         "sob_base %u (mask %#x) is not valid\n",
8659                         sob_base, sob_mask);
8660                 return 0;
8661         }
8662
8663         /*
8664          * monitor_base should be the content of the base0 address registers,
8665          * so it will be added to the msg short offsets
8666          */
8667         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8668
8669         msg_addr_offset =
8670                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8671                                 monitor_base;
8672
8673         memset(pkt, 0, pkt_size);
8674
8675         /* Monitor config packet: bind the monitor to a sync object */
8676         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8677         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8678         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8679                         0); /* GREATER OR EQUAL*/
8680         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8681
8682         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8683         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8684         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8685         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8686         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8687         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8688         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8689
8690         pkt->value = cpu_to_le32(value);
8691         pkt->ctl = cpu_to_le32(ctl);
8692
8693         return pkt_size;
8694 }
8695
8696 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8697 {
8698         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8699
8700         memset(pkt, 0, pkt_size);
8701
8702         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8703         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8704         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8705
8706         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8707         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8708         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8709         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8710
8711         pkt->cfg = cpu_to_le32(cfg);
8712         pkt->ctl = cpu_to_le32(ctl);
8713
8714         return pkt_size;
8715 }
8716
8717 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8718 {
8719         u32 offset, nic_index;
8720
8721         switch (queue_id) {
8722         case GAUDI_QUEUE_ID_DMA_0_0:
8723                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8724                 break;
8725         case GAUDI_QUEUE_ID_DMA_0_1:
8726                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8727                 break;
8728         case GAUDI_QUEUE_ID_DMA_0_2:
8729                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8730                 break;
8731         case GAUDI_QUEUE_ID_DMA_0_3:
8732                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8733                 break;
8734         case GAUDI_QUEUE_ID_DMA_1_0:
8735                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8736                 break;
8737         case GAUDI_QUEUE_ID_DMA_1_1:
8738                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8739                 break;
8740         case GAUDI_QUEUE_ID_DMA_1_2:
8741                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8742                 break;
8743         case GAUDI_QUEUE_ID_DMA_1_3:
8744                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8745                 break;
8746         case GAUDI_QUEUE_ID_DMA_5_0:
8747                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8748                 break;
8749         case GAUDI_QUEUE_ID_DMA_5_1:
8750                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8751                 break;
8752         case GAUDI_QUEUE_ID_DMA_5_2:
8753                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8754                 break;
8755         case GAUDI_QUEUE_ID_DMA_5_3:
8756                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8757                 break;
8758         case GAUDI_QUEUE_ID_TPC_7_0:
8759                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8760                 break;
8761         case GAUDI_QUEUE_ID_TPC_7_1:
8762                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8763                 break;
8764         case GAUDI_QUEUE_ID_TPC_7_2:
8765                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8766                 break;
8767         case GAUDI_QUEUE_ID_TPC_7_3:
8768                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8769                 break;
8770         case GAUDI_QUEUE_ID_NIC_0_0:
8771         case GAUDI_QUEUE_ID_NIC_1_0:
8772         case GAUDI_QUEUE_ID_NIC_2_0:
8773         case GAUDI_QUEUE_ID_NIC_3_0:
8774         case GAUDI_QUEUE_ID_NIC_4_0:
8775         case GAUDI_QUEUE_ID_NIC_5_0:
8776         case GAUDI_QUEUE_ID_NIC_6_0:
8777         case GAUDI_QUEUE_ID_NIC_7_0:
8778         case GAUDI_QUEUE_ID_NIC_8_0:
8779         case GAUDI_QUEUE_ID_NIC_9_0:
8780                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8781                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8782                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8783                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8784                 break;
8785         case GAUDI_QUEUE_ID_NIC_0_1:
8786         case GAUDI_QUEUE_ID_NIC_1_1:
8787         case GAUDI_QUEUE_ID_NIC_2_1:
8788         case GAUDI_QUEUE_ID_NIC_3_1:
8789         case GAUDI_QUEUE_ID_NIC_4_1:
8790         case GAUDI_QUEUE_ID_NIC_5_1:
8791         case GAUDI_QUEUE_ID_NIC_6_1:
8792         case GAUDI_QUEUE_ID_NIC_7_1:
8793         case GAUDI_QUEUE_ID_NIC_8_1:
8794         case GAUDI_QUEUE_ID_NIC_9_1:
8795                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8796                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8797                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8798                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8799                 break;
8800         case GAUDI_QUEUE_ID_NIC_0_2:
8801         case GAUDI_QUEUE_ID_NIC_1_2:
8802         case GAUDI_QUEUE_ID_NIC_2_2:
8803         case GAUDI_QUEUE_ID_NIC_3_2:
8804         case GAUDI_QUEUE_ID_NIC_4_2:
8805         case GAUDI_QUEUE_ID_NIC_5_2:
8806         case GAUDI_QUEUE_ID_NIC_6_2:
8807         case GAUDI_QUEUE_ID_NIC_7_2:
8808         case GAUDI_QUEUE_ID_NIC_8_2:
8809         case GAUDI_QUEUE_ID_NIC_9_2:
8810                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8811                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8812                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8813                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8814                 break;
8815         case GAUDI_QUEUE_ID_NIC_0_3:
8816         case GAUDI_QUEUE_ID_NIC_1_3:
8817         case GAUDI_QUEUE_ID_NIC_2_3:
8818         case GAUDI_QUEUE_ID_NIC_3_3:
8819         case GAUDI_QUEUE_ID_NIC_4_3:
8820         case GAUDI_QUEUE_ID_NIC_5_3:
8821         case GAUDI_QUEUE_ID_NIC_6_3:
8822         case GAUDI_QUEUE_ID_NIC_7_3:
8823         case GAUDI_QUEUE_ID_NIC_8_3:
8824         case GAUDI_QUEUE_ID_NIC_9_3:
8825                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8826                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8827                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8828                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8829                 break;
8830         default:
8831                 return -EINVAL;
8832         }
8833
8834         *addr = CFG_BASE + offset;
8835
8836         return 0;
8837 }
8838
8839 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8840 {
8841         u64 monitor_base;
8842         u32 size = 0;
8843         u16 msg_addr_offset;
8844
8845         /*
8846          * monitor_base should be the content of the base0 address registers,
8847          * so it will be added to the msg short offsets
8848          */
8849         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8850
8851         /* First monitor config packet: low address of the sync */
8852         msg_addr_offset =
8853                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8854                                 monitor_base;
8855
8856         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8857                                         msg_addr_offset);
8858
8859         /* Second monitor config packet: high address of the sync */
8860         msg_addr_offset =
8861                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8862                                 monitor_base;
8863
8864         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8865                                         msg_addr_offset);
8866
8867         /*
8868          * Third monitor config packet: the payload, i.e. what to write when the
8869          * sync triggers
8870          */
8871         msg_addr_offset =
8872                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8873                                 monitor_base;
8874
8875         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8876
8877         return size;
8878 }
8879
8880 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8881                                 struct hl_gen_wait_properties *prop)
8882 {
8883         struct hl_cb *cb = (struct hl_cb *) prop->data;
8884         void *buf = cb->kernel_address;
8885         u64 fence_addr = 0;
8886         u32 size = prop->size;
8887
8888         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8889                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8890                                 prop->q_idx);
8891                 return 0;
8892         }
8893
8894         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8895         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8896                         prop->sob_mask, prop->sob_val, prop->mon_id);
8897         size += gaudi_add_fence_pkt(buf + size);
8898
8899         return size;
8900 }
8901
8902 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8903 {
8904         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8905         int rc;
8906
8907         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8908                 hw_sob->sob_id);
8909
8910         rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8911                         CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8912                         hw_sob->sob_id * 4, 1, 0);
8913         if (rc)
8914                 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
8915
8916         kref_init(&hw_sob->kref);
8917 }
8918
8919 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8920 {
8921         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8922                                                         HL_POWER9_HOST_MAGIC) {
8923                 hdev->power9_64bit_dma_enable = 1;
8924                 hdev->dma_mask = 64;
8925         } else {
8926                 hdev->power9_64bit_dma_enable = 0;
8927                 hdev->dma_mask = 48;
8928         }
8929 }
8930
8931 static u64 gaudi_get_device_time(struct hl_device *hdev)
8932 {
8933         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8934
8935         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8936 }
8937
8938 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8939                                 u32 *block_size, u32 *block_id)
8940 {
8941         return -EPERM;
8942 }
8943
8944 static int gaudi_block_mmap(struct hl_device *hdev,
8945                                 struct vm_area_struct *vma,
8946                                 u32 block_id, u32 block_size)
8947 {
8948         return -EPERM;
8949 }
8950
8951 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8952 {
8953         struct cpu_dyn_regs *dyn_regs =
8954                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8955         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8956                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8957                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
8958
8959         WREG32(irq_handler_offset,
8960                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8961 }
8962
8963 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8964 {
8965         switch (pll_idx) {
8966         case HL_GAUDI_CPU_PLL: return CPU_PLL;
8967         case HL_GAUDI_PCI_PLL: return PCI_PLL;
8968         case HL_GAUDI_NIC_PLL: return NIC_PLL;
8969         case HL_GAUDI_DMA_PLL: return DMA_PLL;
8970         case HL_GAUDI_MESH_PLL: return MESH_PLL;
8971         case HL_GAUDI_MME_PLL: return MME_PLL;
8972         case HL_GAUDI_TPC_PLL: return TPC_PLL;
8973         case HL_GAUDI_IF_PLL: return IF_PLL;
8974         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8975         case HL_GAUDI_HBM_PLL: return HBM_PLL;
8976         default: return -EINVAL;
8977         }
8978 }
8979
8980 static const struct hl_asic_funcs gaudi_funcs = {
8981         .early_init = gaudi_early_init,
8982         .early_fini = gaudi_early_fini,
8983         .late_init = gaudi_late_init,
8984         .late_fini = gaudi_late_fini,
8985         .sw_init = gaudi_sw_init,
8986         .sw_fini = gaudi_sw_fini,
8987         .hw_init = gaudi_hw_init,
8988         .hw_fini = gaudi_hw_fini,
8989         .halt_engines = gaudi_halt_engines,
8990         .suspend = gaudi_suspend,
8991         .resume = gaudi_resume,
8992         .cb_mmap = gaudi_cb_mmap,
8993         .ring_doorbell = gaudi_ring_doorbell,
8994         .pqe_write = gaudi_pqe_write,
8995         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
8996         .asic_dma_free_coherent = gaudi_dma_free_coherent,
8997         .scrub_device_mem = gaudi_scrub_device_mem,
8998         .get_int_queue_base = gaudi_get_int_queue_base,
8999         .test_queues = gaudi_test_queues,
9000         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9001         .asic_dma_pool_free = gaudi_dma_pool_free,
9002         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9003         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9004         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9005         .cs_parser = gaudi_cs_parser,
9006         .asic_dma_map_sg = gaudi_dma_map_sg,
9007         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9008         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9009         .update_eq_ci = gaudi_update_eq_ci,
9010         .context_switch = gaudi_context_switch,
9011         .restore_phase_topology = gaudi_restore_phase_topology,
9012         .debugfs_read32 = gaudi_debugfs_read32,
9013         .debugfs_write32 = gaudi_debugfs_write32,
9014         .debugfs_read64 = gaudi_debugfs_read64,
9015         .debugfs_write64 = gaudi_debugfs_write64,
9016         .debugfs_read_dma = gaudi_debugfs_read_dma,
9017         .add_device_attr = gaudi_add_device_attr,
9018         .handle_eqe = gaudi_handle_eqe,
9019         .set_pll_profile = gaudi_set_pll_profile,
9020         .get_events_stat = gaudi_get_events_stat,
9021         .read_pte = gaudi_read_pte,
9022         .write_pte = gaudi_write_pte,
9023         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9024         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9025         .send_heartbeat = gaudi_send_heartbeat,
9026         .set_clock_gating = gaudi_set_clock_gating,
9027         .disable_clock_gating = gaudi_disable_clock_gating,
9028         .debug_coresight = gaudi_debug_coresight,
9029         .is_device_idle = gaudi_is_device_idle,
9030         .soft_reset_late_init = gaudi_soft_reset_late_init,
9031         .hw_queues_lock = gaudi_hw_queues_lock,
9032         .hw_queues_unlock = gaudi_hw_queues_unlock,
9033         .get_pci_id = gaudi_get_pci_id,
9034         .get_eeprom_data = gaudi_get_eeprom_data,
9035         .send_cpu_message = gaudi_send_cpu_message,
9036         .pci_bars_map = gaudi_pci_bars_map,
9037         .init_iatu = gaudi_init_iatu,
9038         .rreg = hl_rreg,
9039         .wreg = hl_wreg,
9040         .halt_coresight = gaudi_halt_coresight,
9041         .ctx_init = gaudi_ctx_init,
9042         .ctx_fini = gaudi_ctx_fini,
9043         .get_clk_rate = gaudi_get_clk_rate,
9044         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9045         .load_firmware_to_device = gaudi_load_firmware_to_device,
9046         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9047         .get_signal_cb_size = gaudi_get_signal_cb_size,
9048         .get_wait_cb_size = gaudi_get_wait_cb_size,
9049         .gen_signal_cb = gaudi_gen_signal_cb,
9050         .gen_wait_cb = gaudi_gen_wait_cb,
9051         .reset_sob = gaudi_reset_sob,
9052         .reset_sob_group = gaudi_reset_sob_group,
9053         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9054         .get_device_time = gaudi_get_device_time,
9055         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9056         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9057         .scramble_addr = hl_mmu_scramble_addr,
9058         .descramble_addr = hl_mmu_descramble_addr,
9059         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9060         .get_hw_block_id = gaudi_get_hw_block_id,
9061         .hw_block_mmap = gaudi_block_mmap,
9062         .enable_events_from_fw = gaudi_enable_events_from_fw,
9063         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9064         .init_firmware_loader = gaudi_init_firmware_loader,
9065         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm
9066 };
9067
9068 /**
9069  * gaudi_set_asic_funcs - set GAUDI function pointers
9070  *
9071  * @hdev: pointer to hl_device structure
9072  *
9073  */
9074 void gaudi_set_asic_funcs(struct hl_device *hdev)
9075 {
9076         hdev->asic_funcs = &gaudi_funcs;
9077 }