habanalabs: state dump monitors and fences infrastructure
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61
62 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
65
66 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
67
68 #define GAUDI_RESET_TIMEOUT_MSEC        2000            /* 2000ms */
69 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
70 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
71 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
72
73 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
74 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
75 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
76 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
77 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
78 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
79 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
80 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
81 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC  15000000        /* 15s */
82
83 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
84
85 #define GAUDI_MAX_STRING_LEN            20
86
87 #define GAUDI_CB_POOL_CB_CNT            512
88 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
89
90 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
91
92 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
93
94 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
95
96 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
97
98 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
99
100 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
101                 BIT(GAUDI_ENGINE_ID_MME_0) |\
102                 BIT(GAUDI_ENGINE_ID_MME_2) |\
103                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
104
105 #define HBM_SCRUBBING_TIMEOUT_US        1000000 /* 1s */
106
107 #define GAUDI_PLL_MAX 10
108
109 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
110                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
111                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
112                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
113                 "gaudi cpu eq"
114 };
115
116 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
117         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
118         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
119         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
120         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
121         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
122         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
123         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
124         [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
125 };
126
127 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
128         [0] = GAUDI_QUEUE_ID_DMA_0_0,
129         [1] = GAUDI_QUEUE_ID_DMA_0_1,
130         [2] = GAUDI_QUEUE_ID_DMA_0_2,
131         [3] = GAUDI_QUEUE_ID_DMA_0_3,
132         [4] = GAUDI_QUEUE_ID_DMA_1_0,
133         [5] = GAUDI_QUEUE_ID_DMA_1_1,
134         [6] = GAUDI_QUEUE_ID_DMA_1_2,
135         [7] = GAUDI_QUEUE_ID_DMA_1_3,
136 };
137
138 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
139         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
140         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
141         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
142         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
143         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
144         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
145         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
146         [PACKET_FENCE]          = sizeof(struct packet_fence),
147         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
148         [PACKET_NOP]            = sizeof(struct packet_nop),
149         [PACKET_STOP]           = sizeof(struct packet_stop),
150         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
151         [PACKET_WAIT]           = sizeof(struct packet_wait),
152         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
153 };
154
155 static inline bool validate_packet_id(enum packet_id id)
156 {
157         switch (id) {
158         case PACKET_WREG_32:
159         case PACKET_WREG_BULK:
160         case PACKET_MSG_LONG:
161         case PACKET_MSG_SHORT:
162         case PACKET_CP_DMA:
163         case PACKET_REPEAT:
164         case PACKET_MSG_PROT:
165         case PACKET_FENCE:
166         case PACKET_LIN_DMA:
167         case PACKET_NOP:
168         case PACKET_STOP:
169         case PACKET_ARB_POINT:
170         case PACKET_WAIT:
171         case PACKET_LOAD_AND_EXE:
172                 return true;
173         default:
174                 return false;
175         }
176 }
177
178 static const char * const
179 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
180         "tpc_address_exceed_slm",
181         "tpc_div_by_0",
182         "tpc_spu_mac_overflow",
183         "tpc_spu_addsub_overflow",
184         "tpc_spu_abs_overflow",
185         "tpc_spu_fp_dst_nan_inf",
186         "tpc_spu_fp_dst_denorm",
187         "tpc_vpu_mac_overflow",
188         "tpc_vpu_addsub_overflow",
189         "tpc_vpu_abs_overflow",
190         "tpc_vpu_fp_dst_nan_inf",
191         "tpc_vpu_fp_dst_denorm",
192         "tpc_assertions",
193         "tpc_illegal_instruction",
194         "tpc_pc_wrap_around",
195         "tpc_qm_sw_err",
196         "tpc_hbw_rresp_err",
197         "tpc_hbw_bresp_err",
198         "tpc_lbw_rresp_err",
199         "tpc_lbw_bresp_err"
200 };
201
202 static const char * const
203 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
204         "PQ AXI HBW error",
205         "CQ AXI HBW error",
206         "CP AXI HBW error",
207         "CP error due to undefined OPCODE",
208         "CP encountered STOP OPCODE",
209         "CP AXI LBW error",
210         "CP WRREG32 or WRBULK returned error",
211         "N/A",
212         "FENCE 0 inc over max value and clipped",
213         "FENCE 1 inc over max value and clipped",
214         "FENCE 2 inc over max value and clipped",
215         "FENCE 3 inc over max value and clipped",
216         "FENCE 0 dec under min value and clipped",
217         "FENCE 1 dec under min value and clipped",
218         "FENCE 2 dec under min value and clipped",
219         "FENCE 3 dec under min value and clipped"
220 };
221
222 static const char * const
223 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
224         "Choice push while full error",
225         "Choice Q watchdog error",
226         "MSG AXI LBW returned with error"
227 };
228
229 enum gaudi_sm_sei_cause {
230         GAUDI_SM_SEI_SO_OVERFLOW,
231         GAUDI_SM_SEI_LBW_4B_UNALIGNED,
232         GAUDI_SM_SEI_AXI_RESPONSE_ERR
233 };
234
235 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
236         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
237         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
239         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
240         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
241         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
242         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
243         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
244         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
245         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
246         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
252         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
253         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
254         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
255         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
304         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
305         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
306         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
307         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
308         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
309         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
310         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
311         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
312         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
313         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
314         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
315         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
316         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
317         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
318         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
319         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
320         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
321         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
322         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
323         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
324         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
325         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
326         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
327         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
328         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
329         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
330         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
331         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
332         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
333         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
334         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
335         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
336         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
337         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
338         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
339         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
340         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
341         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
342         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
343         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
344         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
345         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
346         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
347         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
348         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
349 };
350
351 static s64 gaudi_state_dump_specs_props[SP_MAX] = {0};
352
353 struct ecc_info_extract_params {
354         u64 block_address;
355         u32 num_memories;
356         bool derr;
357         bool disable_clock_gating;
358 };
359
360 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
361                                                                 u64 phys_addr);
362 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
363                                         struct hl_cs_job *job);
364 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
365                                         u32 size, u64 val);
366 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
367                                         u32 num_regs, u32 val);
368 static int gaudi_schedule_register_memset(struct hl_device *hdev,
369                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
370 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
371                                 u32 tpc_id);
372 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
373 static int gaudi_cpucp_info_get(struct hl_device *hdev);
374 static void gaudi_disable_clock_gating(struct hl_device *hdev);
375 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
376 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
377                                 u32 size, bool eb);
378 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
379                                 struct hl_gen_wait_properties *prop);
380
381 static inline enum hl_collective_mode
382 get_collective_mode(struct hl_device *hdev, u32 queue_id)
383 {
384         if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
385                 return HL_COLLECTIVE_MASTER;
386
387         if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
388                         queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
389                 return HL_COLLECTIVE_SLAVE;
390
391         if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
392                         queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
393                 return HL_COLLECTIVE_SLAVE;
394
395         if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
396                         queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
397                 return HL_COLLECTIVE_SLAVE;
398
399         return HL_COLLECTIVE_NOT_SUPPORTED;
400 }
401
402 static inline void set_default_power_values(struct hl_device *hdev)
403 {
404         struct asic_fixed_properties *prop = &hdev->asic_prop;
405
406         if (hdev->card_type == cpucp_card_type_pmc) {
407                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
408                 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
409         } else {
410                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
411                 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
412         }
413 }
414
415 static int gaudi_set_fixed_properties(struct hl_device *hdev)
416 {
417         struct asic_fixed_properties *prop = &hdev->asic_prop;
418         u32 num_sync_stream_queues = 0;
419         int i;
420
421         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
422         prop->hw_queues_props = kcalloc(prop->max_queues,
423                         sizeof(struct hw_queue_properties),
424                         GFP_KERNEL);
425
426         if (!prop->hw_queues_props)
427                 return -ENOMEM;
428
429         for (i = 0 ; i < prop->max_queues ; i++) {
430                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
431                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
432                         prop->hw_queues_props[i].driver_only = 0;
433                         prop->hw_queues_props[i].supports_sync_stream = 1;
434                         prop->hw_queues_props[i].cb_alloc_flags =
435                                 CB_ALLOC_KERNEL;
436                         num_sync_stream_queues++;
437                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
438                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
439                         prop->hw_queues_props[i].driver_only = 1;
440                         prop->hw_queues_props[i].supports_sync_stream = 0;
441                         prop->hw_queues_props[i].cb_alloc_flags =
442                                 CB_ALLOC_KERNEL;
443                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
444                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
445                         prop->hw_queues_props[i].driver_only = 0;
446                         prop->hw_queues_props[i].supports_sync_stream = 0;
447                         prop->hw_queues_props[i].cb_alloc_flags =
448                                 CB_ALLOC_USER;
449
450                 }
451                 prop->hw_queues_props[i].collective_mode =
452                                                 get_collective_mode(hdev, i);
453         }
454
455         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
456         prop->collective_first_sob = 0;
457         prop->collective_first_mon = 0;
458
459         /* 2 SOBs per internal queue stream are reserved for collective */
460         prop->sync_stream_first_sob =
461                         ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
462                         * QMAN_STREAMS * HL_RSVD_SOBS;
463
464         /* 1 monitor per internal queue stream are reserved for collective
465          * 2 monitors per external queue stream are reserved for collective
466          */
467         prop->sync_stream_first_mon =
468                         (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
469                         (NUMBER_OF_EXT_HW_QUEUES * 2);
470
471         prop->dram_base_address = DRAM_PHYS_BASE;
472         prop->dram_size = GAUDI_HBM_SIZE_32GB;
473         prop->dram_end_address = prop->dram_base_address +
474                                         prop->dram_size;
475         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
476
477         prop->sram_base_address = SRAM_BASE_ADDR;
478         prop->sram_size = SRAM_SIZE;
479         prop->sram_end_address = prop->sram_base_address +
480                                         prop->sram_size;
481         prop->sram_user_base_address = prop->sram_base_address +
482                                         SRAM_USER_BASE_OFFSET;
483
484         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
485         if (hdev->pldm)
486                 prop->mmu_pgt_size = 0x800000; /* 8MB */
487         else
488                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
489         prop->mmu_pte_size = HL_PTE_SIZE;
490         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
491         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
492         prop->dram_page_size = PAGE_SIZE_2MB;
493         prop->dram_supports_virtual_memory = false;
494
495         prop->pmmu.hop0_shift = HOP0_SHIFT;
496         prop->pmmu.hop1_shift = HOP1_SHIFT;
497         prop->pmmu.hop2_shift = HOP2_SHIFT;
498         prop->pmmu.hop3_shift = HOP3_SHIFT;
499         prop->pmmu.hop4_shift = HOP4_SHIFT;
500         prop->pmmu.hop0_mask = HOP0_MASK;
501         prop->pmmu.hop1_mask = HOP1_MASK;
502         prop->pmmu.hop2_mask = HOP2_MASK;
503         prop->pmmu.hop3_mask = HOP3_MASK;
504         prop->pmmu.hop4_mask = HOP4_MASK;
505         prop->pmmu.start_addr = VA_HOST_SPACE_START;
506         prop->pmmu.end_addr =
507                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
508         prop->pmmu.page_size = PAGE_SIZE_4KB;
509         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
510
511         /* PMMU and HPMMU are the same except of page size */
512         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
513         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
514
515         /* shifts and masks are the same in PMMU and DMMU */
516         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
517         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
518         prop->dmmu.end_addr = VA_HOST_SPACE_END;
519         prop->dmmu.page_size = PAGE_SIZE_2MB;
520
521         prop->cfg_size = CFG_SIZE;
522         prop->max_asid = MAX_ASID;
523         prop->num_of_events = GAUDI_EVENT_SIZE;
524         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
525
526         set_default_power_values(hdev);
527
528         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
529         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
530
531         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
532         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
533
534         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
535                                         CARD_NAME_MAX_LEN);
536
537         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
538
539         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
540                         prop->sync_stream_first_sob +
541                         (num_sync_stream_queues * HL_RSVD_SOBS);
542         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
543                         prop->sync_stream_first_mon +
544                         (num_sync_stream_queues * HL_RSVD_MONS);
545
546         prop->first_available_user_msix_interrupt = USHRT_MAX;
547
548         for (i = 0 ; i < HL_MAX_DCORES ; i++)
549                 prop->first_available_cq[i] = USHRT_MAX;
550
551         prop->fw_cpu_boot_dev_sts0_valid = false;
552         prop->fw_cpu_boot_dev_sts1_valid = false;
553         prop->hard_reset_done_by_fw = false;
554         prop->gic_interrupts_enable = true;
555
556         return 0;
557 }
558
559 static int gaudi_pci_bars_map(struct hl_device *hdev)
560 {
561         static const char * const name[] = {"SRAM", "CFG", "HBM"};
562         bool is_wc[3] = {false, false, true};
563         int rc;
564
565         rc = hl_pci_bars_map(hdev, name, is_wc);
566         if (rc)
567                 return rc;
568
569         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
570                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
571
572         return 0;
573 }
574
575 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
576 {
577         struct gaudi_device *gaudi = hdev->asic_specific;
578         struct hl_inbound_pci_region pci_region;
579         u64 old_addr = addr;
580         int rc;
581
582         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
583                 return old_addr;
584
585         if (hdev->asic_prop.iatu_done_by_fw)
586                 return U64_MAX;
587
588         /* Inbound Region 2 - Bar 4 - Point to HBM */
589         pci_region.mode = PCI_BAR_MATCH_MODE;
590         pci_region.bar = HBM_BAR_ID;
591         pci_region.addr = addr;
592         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
593         if (rc)
594                 return U64_MAX;
595
596         if (gaudi) {
597                 old_addr = gaudi->hbm_bar_cur_addr;
598                 gaudi->hbm_bar_cur_addr = addr;
599         }
600
601         return old_addr;
602 }
603
604 static int gaudi_init_iatu(struct hl_device *hdev)
605 {
606         struct hl_inbound_pci_region inbound_region;
607         struct hl_outbound_pci_region outbound_region;
608         int rc;
609
610         if (hdev->asic_prop.iatu_done_by_fw)
611                 return 0;
612
613         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
614         inbound_region.mode = PCI_BAR_MATCH_MODE;
615         inbound_region.bar = SRAM_BAR_ID;
616         inbound_region.addr = SRAM_BASE_ADDR;
617         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
618         if (rc)
619                 goto done;
620
621         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
622         inbound_region.mode = PCI_BAR_MATCH_MODE;
623         inbound_region.bar = CFG_BAR_ID;
624         inbound_region.addr = SPI_FLASH_BASE_ADDR;
625         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
626         if (rc)
627                 goto done;
628
629         /* Inbound Region 2 - Bar 4 - Point to HBM */
630         inbound_region.mode = PCI_BAR_MATCH_MODE;
631         inbound_region.bar = HBM_BAR_ID;
632         inbound_region.addr = DRAM_PHYS_BASE;
633         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
634         if (rc)
635                 goto done;
636
637         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
638
639         /* Outbound Region 0 - Point to Host */
640         outbound_region.addr = HOST_PHYS_BASE;
641         outbound_region.size = HOST_PHYS_SIZE;
642         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
643
644 done:
645         return rc;
646 }
647
648 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
649 {
650         return RREG32(mmHW_STATE);
651 }
652
653 static int gaudi_early_init(struct hl_device *hdev)
654 {
655         struct asic_fixed_properties *prop = &hdev->asic_prop;
656         struct pci_dev *pdev = hdev->pdev;
657         u32 fw_boot_status;
658         int rc;
659
660         rc = gaudi_set_fixed_properties(hdev);
661         if (rc) {
662                 dev_err(hdev->dev, "Failed setting fixed properties\n");
663                 return rc;
664         }
665
666         /* Check BAR sizes */
667         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
668                 dev_err(hdev->dev,
669                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
670                         SRAM_BAR_ID,
671                         (unsigned long long) pci_resource_len(pdev,
672                                                         SRAM_BAR_ID),
673                         SRAM_BAR_SIZE);
674                 rc = -ENODEV;
675                 goto free_queue_props;
676         }
677
678         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
679                 dev_err(hdev->dev,
680                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
681                         CFG_BAR_ID,
682                         (unsigned long long) pci_resource_len(pdev,
683                                                                 CFG_BAR_ID),
684                         CFG_BAR_SIZE);
685                 rc = -ENODEV;
686                 goto free_queue_props;
687         }
688
689         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
690
691         /* If FW security is enabled at this point it means no access to ELBI */
692         if (hdev->asic_prop.fw_security_enabled) {
693                 hdev->asic_prop.iatu_done_by_fw = true;
694
695                 /*
696                  * GIC-security-bit can ONLY be set by CPUCP, so in this stage
697                  * decision can only be taken based on PCI ID security.
698                  */
699                 hdev->asic_prop.gic_interrupts_enable = false;
700                 goto pci_init;
701         }
702
703         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
704                                 &fw_boot_status);
705         if (rc)
706                 goto free_queue_props;
707
708         /* Check whether FW is configuring iATU */
709         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
710                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
711                 hdev->asic_prop.iatu_done_by_fw = true;
712
713 pci_init:
714         rc = hl_pci_init(hdev);
715         if (rc)
716                 goto free_queue_props;
717
718         /* Before continuing in the initialization, we need to read the preboot
719          * version to determine whether we run with a security-enabled firmware
720          */
721         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
722                                         mmCPU_BOOT_DEV_STS0,
723                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
724                                         mmCPU_BOOT_ERR1,
725                                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
726         if (rc) {
727                 if (hdev->reset_on_preboot_fail)
728                         hdev->asic_funcs->hw_fini(hdev, true);
729                 goto pci_fini;
730         }
731
732         if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
733                 dev_info(hdev->dev,
734                         "H/W state is dirty, must reset before initializing\n");
735                 hdev->asic_funcs->hw_fini(hdev, true);
736         }
737
738         return 0;
739
740 pci_fini:
741         hl_pci_fini(hdev);
742 free_queue_props:
743         kfree(hdev->asic_prop.hw_queues_props);
744         return rc;
745 }
746
747 static int gaudi_early_fini(struct hl_device *hdev)
748 {
749         kfree(hdev->asic_prop.hw_queues_props);
750         hl_pci_fini(hdev);
751
752         return 0;
753 }
754
755 /**
756  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
757  *
758  * @hdev: pointer to hl_device structure
759  *
760  */
761 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
762 {
763         struct asic_fixed_properties *prop = &hdev->asic_prop;
764         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
765         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
766         int rc;
767
768         if (hdev->asic_prop.fw_security_enabled) {
769                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
770
771                 if (rc)
772                         return rc;
773
774                 freq = pll_freq_arr[2];
775         } else {
776                 /* Backward compatibility */
777                 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
778                 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
779                 nr = RREG32(mmPSOC_CPU_PLL_NR);
780                 nf = RREG32(mmPSOC_CPU_PLL_NF);
781                 od = RREG32(mmPSOC_CPU_PLL_OD);
782
783                 if (div_sel == DIV_SEL_REF_CLK ||
784                                 div_sel == DIV_SEL_DIVIDED_REF) {
785                         if (div_sel == DIV_SEL_REF_CLK)
786                                 freq = PLL_REF_CLK;
787                         else
788                                 freq = PLL_REF_CLK / (div_fctr + 1);
789                 } else if (div_sel == DIV_SEL_PLL_CLK ||
790                         div_sel == DIV_SEL_DIVIDED_PLL) {
791                         pll_clk = PLL_REF_CLK * (nf + 1) /
792                                         ((nr + 1) * (od + 1));
793                         if (div_sel == DIV_SEL_PLL_CLK)
794                                 freq = pll_clk;
795                         else
796                                 freq = pll_clk / (div_fctr + 1);
797                 } else {
798                         dev_warn(hdev->dev,
799                                 "Received invalid div select value: %d",
800                                 div_sel);
801                         freq = 0;
802                 }
803         }
804
805         prop->psoc_timestamp_frequency = freq;
806         prop->psoc_pci_pll_nr = nr;
807         prop->psoc_pci_pll_nf = nf;
808         prop->psoc_pci_pll_od = od;
809         prop->psoc_pci_pll_div_factor = div_fctr;
810
811         return 0;
812 }
813
814 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
815                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
816 {
817         struct asic_fixed_properties *prop = &hdev->asic_prop;
818         struct packet_lin_dma *init_tpc_mem_pkt;
819         struct hl_cs_job *job;
820         struct hl_cb *cb;
821         u64 dst_addr;
822         u32 cb_size, ctl;
823         u8 tpc_id;
824         int rc;
825
826         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
827         if (!cb)
828                 return -EFAULT;
829
830         init_tpc_mem_pkt = cb->kernel_address;
831         cb_size = sizeof(*init_tpc_mem_pkt);
832         memset(init_tpc_mem_pkt, 0, cb_size);
833
834         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
835
836         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
837         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
838         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
839         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
840
841         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
842
843         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
844         dst_addr = (prop->sram_user_base_address &
845                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
846                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
847         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
848
849         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
850         if (!job) {
851                 dev_err(hdev->dev, "Failed to allocate a new job\n");
852                 rc = -ENOMEM;
853                 goto release_cb;
854         }
855
856         job->id = 0;
857         job->user_cb = cb;
858         atomic_inc(&job->user_cb->cs_cnt);
859         job->user_cb_size = cb_size;
860         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
861         job->patched_cb = job->user_cb;
862         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
863
864         hl_debugfs_add_job(hdev, job);
865
866         rc = gaudi_send_job_on_qman0(hdev, job);
867
868         if (rc)
869                 goto free_job;
870
871         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
872                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
873                 if (rc)
874                         break;
875         }
876
877 free_job:
878         hl_userptr_delete_list(hdev, &job->userptr_list);
879         hl_debugfs_remove_job(hdev, job);
880         kfree(job);
881         atomic_dec(&cb->cs_cnt);
882
883 release_cb:
884         hl_cb_put(cb);
885         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
886
887         return rc;
888 }
889
890 /*
891  * gaudi_init_tpc_mem() - Initialize TPC memories.
892  * @hdev: Pointer to hl_device structure.
893  *
894  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
895  *
896  * Return: 0 for success, negative value for error.
897  */
898 static int gaudi_init_tpc_mem(struct hl_device *hdev)
899 {
900         const struct firmware *fw;
901         size_t fw_size;
902         void *cpu_addr;
903         dma_addr_t dma_handle;
904         int rc, count = 5;
905
906 again:
907         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
908         if (rc == -EINTR && count-- > 0) {
909                 msleep(50);
910                 goto again;
911         }
912
913         if (rc) {
914                 dev_err(hdev->dev, "Failed to load firmware file %s\n",
915                                 GAUDI_TPC_FW_FILE);
916                 goto out;
917         }
918
919         fw_size = fw->size;
920         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
921                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
922         if (!cpu_addr) {
923                 dev_err(hdev->dev,
924                         "Failed to allocate %zu of dma memory for TPC kernel\n",
925                         fw_size);
926                 rc = -ENOMEM;
927                 goto out;
928         }
929
930         memcpy(cpu_addr, fw->data, fw_size);
931
932         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
933
934         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
935                         dma_handle);
936
937 out:
938         release_firmware(fw);
939         return rc;
940 }
941
942 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
943 {
944         struct gaudi_device *gaudi = hdev->asic_specific;
945         struct gaudi_collective_properties *prop = &gaudi->collective_props;
946         struct hl_hw_queue *q;
947         u32 i, sob_id, sob_group_id, queue_id;
948
949         /* Iterate through SOB groups and assign a SOB for each slave queue */
950         sob_group_id =
951                 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
952         sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
953
954         queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
955         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
956                 q = &hdev->kernel_queues[queue_id + (4 * i)];
957                 q->sync_stream_prop.collective_sob_id = sob_id + i;
958         }
959
960         /* Both DMA5 and TPC7 use the same resources since only a single
961          * engine need to participate in the reduction process
962          */
963         queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
964         q = &hdev->kernel_queues[queue_id];
965         q->sync_stream_prop.collective_sob_id =
966                         sob_id + NIC_NUMBER_OF_ENGINES;
967
968         queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
969         q = &hdev->kernel_queues[queue_id];
970         q->sync_stream_prop.collective_sob_id =
971                         sob_id + NIC_NUMBER_OF_ENGINES;
972 }
973
974 static void gaudi_sob_group_hw_reset(struct kref *ref)
975 {
976         struct gaudi_hw_sob_group *hw_sob_group =
977                 container_of(ref, struct gaudi_hw_sob_group, kref);
978         struct hl_device *hdev = hw_sob_group->hdev;
979         u64 base_addr;
980         int rc;
981
982         base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
983                         hw_sob_group->base_sob_id * 4;
984         rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
985                         base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
986         if (rc)
987                 dev_err(hdev->dev,
988                         "failed resetting sob group - sob base %u, count %u",
989                         hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
990
991         kref_init(&hw_sob_group->kref);
992 }
993
994 static void gaudi_sob_group_reset_error(struct kref *ref)
995 {
996         struct gaudi_hw_sob_group *hw_sob_group =
997                 container_of(ref, struct gaudi_hw_sob_group, kref);
998         struct hl_device *hdev = hw_sob_group->hdev;
999
1000         dev_crit(hdev->dev,
1001                 "SOB release shouldn't be called here, base_sob_id: %d\n",
1002                 hw_sob_group->base_sob_id);
1003 }
1004
1005 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1006 {
1007         struct gaudi_collective_properties *prop;
1008         int i;
1009
1010         prop = &gaudi->collective_props;
1011
1012         memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1013
1014         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1015                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1016                         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1017                                         BIT(i % HL_MAX_SOBS_PER_MONITOR);
1018         /* Set collective engine bit */
1019         prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1020                                 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1021 }
1022
1023 static int gaudi_collective_init(struct hl_device *hdev)
1024 {
1025         u32 i, sob_id, reserved_sobs_per_group;
1026         struct gaudi_collective_properties *prop;
1027         struct gaudi_device *gaudi;
1028
1029         gaudi = hdev->asic_specific;
1030         prop = &gaudi->collective_props;
1031         sob_id = hdev->asic_prop.collective_first_sob;
1032
1033         /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1034         reserved_sobs_per_group =
1035                 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1036
1037         /* Init SOB groups */
1038         for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1039                 prop->hw_sob_group[i].hdev = hdev;
1040                 prop->hw_sob_group[i].base_sob_id = sob_id;
1041                 sob_id += reserved_sobs_per_group;
1042                 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1043         }
1044
1045         for (i = 0 ; i < QMAN_STREAMS; i++) {
1046                 prop->next_sob_group_val[i] = 1;
1047                 prop->curr_sob_group_idx[i] = 0;
1048                 gaudi_collective_map_sobs(hdev, i);
1049         }
1050
1051         gaudi_collective_mstr_sob_mask_set(gaudi);
1052
1053         return 0;
1054 }
1055
1056 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1057 {
1058         struct gaudi_device *gaudi = hdev->asic_specific;
1059         struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1060
1061         kref_put(&cprop->hw_sob_group[sob_group].kref,
1062                                         gaudi_sob_group_hw_reset);
1063 }
1064
1065 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1066                 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1067 {
1068         u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1069         struct gaudi_collective_properties *cprop;
1070         struct hl_gen_wait_properties wait_prop;
1071         struct hl_sync_stream_properties *prop;
1072         struct gaudi_device *gaudi;
1073
1074         gaudi = hdev->asic_specific;
1075         cprop = &gaudi->collective_props;
1076         queue_id = job->hw_queue_id;
1077         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1078
1079         master_sob_base =
1080                 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1081         master_monitor = prop->collective_mstr_mon_id[0];
1082
1083         cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1084
1085         dev_dbg(hdev->dev,
1086                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1087                 master_sob_base, cprop->mstr_sob_mask[0],
1088                 cprop->next_sob_group_val[stream],
1089                 master_monitor, queue_id);
1090
1091         wait_prop.data = (void *) job->patched_cb;
1092         wait_prop.sob_base = master_sob_base;
1093         wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1094         wait_prop.sob_val = cprop->next_sob_group_val[stream];
1095         wait_prop.mon_id = master_monitor;
1096         wait_prop.q_idx = queue_id;
1097         wait_prop.size = cb_size;
1098         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1099
1100         master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1101         master_monitor = prop->collective_mstr_mon_id[1];
1102
1103         dev_dbg(hdev->dev,
1104                 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1105                 master_sob_base, cprop->mstr_sob_mask[1],
1106                 cprop->next_sob_group_val[stream],
1107                 master_monitor, queue_id);
1108
1109         wait_prop.sob_base = master_sob_base;
1110         wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1111         wait_prop.mon_id = master_monitor;
1112         wait_prop.size = cb_size;
1113         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1114 }
1115
1116 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1117                 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1118 {
1119         struct hl_gen_wait_properties wait_prop;
1120         struct hl_sync_stream_properties *prop;
1121         u32 queue_id, cb_size = 0;
1122
1123         queue_id = job->hw_queue_id;
1124         prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1125
1126         /* Add to wait CBs using slave monitor */
1127         wait_prop.data = (void *) job->user_cb;
1128         wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1129         wait_prop.sob_mask = 0x1;
1130         wait_prop.sob_val = cs_cmpl->sob_val;
1131         wait_prop.mon_id = prop->collective_slave_mon_id;
1132         wait_prop.q_idx = queue_id;
1133         wait_prop.size = cb_size;
1134
1135         dev_dbg(hdev->dev,
1136                 "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
1137                 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1138                 prop->collective_slave_mon_id, queue_id);
1139
1140         cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1141
1142         dev_dbg(hdev->dev,
1143                 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1144                 prop->collective_sob_id, queue_id);
1145
1146         cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1147                         prop->collective_sob_id, cb_size, false);
1148 }
1149
1150 static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
1151 {
1152         struct hl_cs_compl *signal_cs_cmpl =
1153                 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1154         struct hl_cs_compl *cs_cmpl =
1155                 container_of(cs->fence, struct hl_cs_compl, base_fence);
1156         struct gaudi_collective_properties *cprop;
1157         u32 stream, queue_id, sob_group_offset;
1158         struct gaudi_device *gaudi;
1159         struct hl_device *hdev;
1160         struct hl_cs_job *job;
1161         struct hl_ctx *ctx;
1162
1163         ctx = cs->ctx;
1164         hdev = ctx->hdev;
1165         gaudi = hdev->asic_specific;
1166         cprop = &gaudi->collective_props;
1167
1168         /* copy the SOB id and value of the signal CS */
1169         cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1170         cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1171
1172         /* Calculate the stream from collective master queue (1st job) */
1173         job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1174         stream = job->hw_queue_id % 4;
1175         sob_group_offset =
1176                 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1177
1178         list_for_each_entry(job, &cs->job_list, cs_node) {
1179                 queue_id = job->hw_queue_id;
1180
1181                 if (hdev->kernel_queues[queue_id].collective_mode ==
1182                                 HL_COLLECTIVE_MASTER)
1183                         gaudi_collective_master_init_job(hdev, job, stream,
1184                                                 sob_group_offset);
1185                 else
1186                         gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1187         }
1188
1189         cs_cmpl->sob_group = sob_group_offset;
1190
1191         /* Handle sob group kref and wraparound */
1192         kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1193         cprop->next_sob_group_val[stream]++;
1194
1195         if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1196                 /*
1197                  * Decrement as we reached the max value.
1198                  * The release function won't be called here as we've
1199                  * just incremented the refcount.
1200                  */
1201                 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1202                                 gaudi_sob_group_reset_error);
1203                 cprop->next_sob_group_val[stream] = 1;
1204                 /* only two SOBs are currently in use */
1205                 cprop->curr_sob_group_idx[stream] =
1206                         (cprop->curr_sob_group_idx[stream] + 1) &
1207                                                         (HL_RSVD_SOBS - 1);
1208
1209                 gaudi_collective_map_sobs(hdev, stream);
1210
1211                 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1212                                 cprop->curr_sob_group_idx[stream], stream);
1213         }
1214
1215         /* Increment kref since all slave queues are now waiting on it */
1216         kref_get(&cs_cmpl->hw_sob->kref);
1217         /*
1218          * Must put the signal fence after the SOB refcnt increment so
1219          * the SOB refcnt won't turn 0 and reset the SOB before the
1220          * wait CS was submitted.
1221          */
1222         mb();
1223         hl_fence_put(cs->signal_fence);
1224         cs->signal_fence = NULL;
1225 }
1226
1227 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1228                 struct hl_ctx *ctx, struct hl_cs *cs,
1229                 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
1230 {
1231         struct hw_queue_properties *hw_queue_prop;
1232         struct hl_cs_counters_atomic *cntr;
1233         struct hl_cs_job *job;
1234         struct hl_cb *cb;
1235         u32 cb_size;
1236         bool patched_cb;
1237
1238         cntr = &hdev->aggregated_cs_counters;
1239
1240         if (mode == HL_COLLECTIVE_MASTER) {
1241                 /* CB size of collective master queue contains
1242                  * 4 msg short packets for monitor 1 configuration
1243                  * 1 fence packet
1244                  * 4 msg short packets for monitor 2 configuration
1245                  * 1 fence packet
1246                  * 2 msg prot packets for completion and MSI-X
1247                  */
1248                 cb_size = sizeof(struct packet_msg_short) * 8 +
1249                                 sizeof(struct packet_fence) * 2 +
1250                                 sizeof(struct packet_msg_prot) * 2;
1251                 patched_cb = true;
1252         } else {
1253                 /* CB size of collective slave queues contains
1254                  * 4 msg short packets for monitor configuration
1255                  * 1 fence packet
1256                  * 1 additional msg short packet for sob signal
1257                  */
1258                 cb_size = sizeof(struct packet_msg_short) * 5 +
1259                                 sizeof(struct packet_fence);
1260                 patched_cb = false;
1261         }
1262
1263         hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1264         job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1265         if (!job) {
1266                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1267                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1268                 dev_err(hdev->dev, "Failed to allocate a new job\n");
1269                 return -ENOMEM;
1270         }
1271
1272         /* Allocate internal mapped CB for non patched CBs */
1273         cb = hl_cb_kernel_create(hdev, cb_size,
1274                         hdev->mmu_enable && !patched_cb);
1275         if (!cb) {
1276                 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1277                 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1278                 kfree(job);
1279                 return -EFAULT;
1280         }
1281
1282         job->id = 0;
1283         job->cs = cs;
1284         job->user_cb = cb;
1285         atomic_inc(&job->user_cb->cs_cnt);
1286         job->user_cb_size = cb_size;
1287         job->hw_queue_id = queue_id;
1288
1289         /*
1290          * No need in parsing, user CB is the patched CB.
1291          * We call hl_cb_destroy() out of two reasons - we don't need
1292          * the CB in the CB idr anymore and to decrement its refcount as
1293          * it was incremented inside hl_cb_kernel_create().
1294          */
1295         if (patched_cb)
1296                 job->patched_cb = job->user_cb;
1297         else
1298                 job->patched_cb = NULL;
1299
1300         job->job_cb_size = job->user_cb_size;
1301         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
1302
1303         /* increment refcount as for external queues we get completion */
1304         if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1305                 cs_get(cs);
1306
1307         cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1308
1309         list_add_tail(&job->cs_node, &cs->job_list);
1310
1311         hl_debugfs_add_job(hdev, job);
1312
1313         return 0;
1314 }
1315
1316 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1317                 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
1318                 u32 collective_engine_id)
1319 {
1320         struct gaudi_device *gaudi = hdev->asic_specific;
1321         struct hw_queue_properties *hw_queue_prop;
1322         u32 queue_id, collective_queue, num_jobs;
1323         u32 stream, nic_queue, nic_idx = 0;
1324         bool skip;
1325         int i, rc = 0;
1326
1327         /* Verify wait queue id is configured as master */
1328         hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1329         if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1330                 dev_err(hdev->dev,
1331                         "Queue %d is not configured as collective master\n",
1332                         wait_queue_id);
1333                 return -EINVAL;
1334         }
1335
1336         /* Verify engine id is supported */
1337         if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1338                         collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1339                 dev_err(hdev->dev,
1340                         "Collective wait does not support engine %u\n",
1341                         collective_engine_id);
1342                 return -EINVAL;
1343         }
1344
1345         stream = wait_queue_id % 4;
1346
1347         if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1348                 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1349         else
1350                 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1351
1352         num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1353         nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1354
1355         /* First job goes to the collective master queue, it will wait for
1356          * the collective slave queues to finish execution.
1357          * The synchronization is done using two monitors:
1358          * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1359          * reduction engine (DMA5/TPC7).
1360          *
1361          * Rest of the jobs goes to the collective slave queues which will
1362          * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1363          */
1364         for (i = 0 ; i < num_jobs ; i++) {
1365                 if (i == 0) {
1366                         queue_id = wait_queue_id;
1367                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1368                                 HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
1369                 } else {
1370                         if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1371                                 if (gaudi->hw_cap_initialized &
1372                                         BIT(HW_CAP_NIC_SHIFT + nic_idx))
1373                                         skip = false;
1374                                 else
1375                                         skip = true;
1376
1377                                 queue_id = nic_queue;
1378                                 nic_queue += 4;
1379                                 nic_idx++;
1380
1381                                 if (skip)
1382                                         continue;
1383                         } else {
1384                                 queue_id = collective_queue;
1385                         }
1386
1387                         rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1388                                 HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
1389                 }
1390
1391                 if (rc)
1392                         return rc;
1393         }
1394
1395         return rc;
1396 }
1397
1398 static int gaudi_late_init(struct hl_device *hdev)
1399 {
1400         struct gaudi_device *gaudi = hdev->asic_specific;
1401         int rc;
1402
1403         rc = gaudi->cpucp_info_get(hdev);
1404         if (rc) {
1405                 dev_err(hdev->dev, "Failed to get cpucp info\n");
1406                 return rc;
1407         }
1408
1409         if ((hdev->card_type == cpucp_card_type_pci) &&
1410                         (hdev->nic_ports_mask & 0x3)) {
1411                 dev_info(hdev->dev,
1412                         "PCI card detected, only 8 ports are enabled\n");
1413                 hdev->nic_ports_mask &= ~0x3;
1414
1415                 /* Stop and disable unused NIC QMANs */
1416                 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1417                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1418                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1419
1420                 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1421                                         NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1422                                         NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1423
1424                 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1425                 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1426
1427                 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1428         }
1429
1430         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
1431         if (rc) {
1432                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1433                 return rc;
1434         }
1435
1436         rc = gaudi_fetch_psoc_frequency(hdev);
1437         if (rc) {
1438                 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1439                 goto disable_pci_access;
1440         }
1441
1442         rc = gaudi_mmu_clear_pgt_range(hdev);
1443         if (rc) {
1444                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1445                 goto disable_pci_access;
1446         }
1447
1448         rc = gaudi_init_tpc_mem(hdev);
1449         if (rc) {
1450                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1451                 goto disable_pci_access;
1452         }
1453
1454         rc = gaudi_collective_init(hdev);
1455         if (rc) {
1456                 dev_err(hdev->dev, "Failed to init collective\n");
1457                 goto disable_pci_access;
1458         }
1459
1460         return 0;
1461
1462 disable_pci_access:
1463         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1464
1465         return rc;
1466 }
1467
1468 static void gaudi_late_fini(struct hl_device *hdev)
1469 {
1470         const struct hwmon_channel_info **channel_info_arr;
1471         int i = 0;
1472
1473         if (!hdev->hl_chip_info->info)
1474                 return;
1475
1476         channel_info_arr = hdev->hl_chip_info->info;
1477
1478         while (channel_info_arr[i]) {
1479                 kfree(channel_info_arr[i]->config);
1480                 kfree(channel_info_arr[i]);
1481                 i++;
1482         }
1483
1484         kfree(channel_info_arr);
1485
1486         hdev->hl_chip_info->info = NULL;
1487 }
1488
1489 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1490 {
1491         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1492         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1493         int i, j, rc = 0;
1494
1495         /*
1496          * The device CPU works with 40-bits addresses, while bit 39 must be set
1497          * to '1' when accessing the host.
1498          * Bits 49:39 of the full host address are saved for a later
1499          * configuration of the HW to perform extension to 50 bits.
1500          * Because there is a single HW register that holds the extension bits,
1501          * these bits must be identical in all allocated range.
1502          */
1503
1504         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1505                 virt_addr_arr[i] =
1506                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1507                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1508                                                 &dma_addr_arr[i],
1509                                                 GFP_KERNEL | __GFP_ZERO);
1510                 if (!virt_addr_arr[i]) {
1511                         rc = -ENOMEM;
1512                         goto free_dma_mem_arr;
1513                 }
1514
1515                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1516                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1517                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1518                         break;
1519         }
1520
1521         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1522                 dev_err(hdev->dev,
1523                         "MSB of CPU accessible DMA memory are not identical in all range\n");
1524                 rc = -EFAULT;
1525                 goto free_dma_mem_arr;
1526         }
1527
1528         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1529         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1530         hdev->cpu_pci_msb_addr =
1531                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1532
1533         if (!hdev->asic_prop.fw_security_enabled)
1534                 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1535
1536 free_dma_mem_arr:
1537         for (j = 0 ; j < i ; j++)
1538                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1539                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
1540                                                 virt_addr_arr[j],
1541                                                 dma_addr_arr[j]);
1542
1543         return rc;
1544 }
1545
1546 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1547 {
1548         struct gaudi_device *gaudi = hdev->asic_specific;
1549         struct gaudi_internal_qman_info *q;
1550         u32 i;
1551
1552         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1553                 q = &gaudi->internal_qmans[i];
1554                 if (!q->pq_kernel_addr)
1555                         continue;
1556                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1557                                                         q->pq_kernel_addr,
1558                                                         q->pq_dma_addr);
1559         }
1560 }
1561
1562 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1563 {
1564         struct gaudi_device *gaudi = hdev->asic_specific;
1565         struct gaudi_internal_qman_info *q;
1566         int rc, i;
1567
1568         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1569                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1570                         continue;
1571
1572                 q = &gaudi->internal_qmans[i];
1573
1574                 switch (i) {
1575                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1576                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1577                         break;
1578                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1579                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1580                         break;
1581                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1582                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1583                         break;
1584                 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1585                         q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1586                         break;
1587                 default:
1588                         dev_err(hdev->dev, "Bad internal queue index %d", i);
1589                         rc = -EINVAL;
1590                         goto free_internal_qmans_pq_mem;
1591                 }
1592
1593                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1594                                                 hdev, q->pq_size,
1595                                                 &q->pq_dma_addr,
1596                                                 GFP_KERNEL | __GFP_ZERO);
1597                 if (!q->pq_kernel_addr) {
1598                         rc = -ENOMEM;
1599                         goto free_internal_qmans_pq_mem;
1600                 }
1601         }
1602
1603         return 0;
1604
1605 free_internal_qmans_pq_mem:
1606         gaudi_free_internal_qmans_pq_mem(hdev);
1607         return rc;
1608 }
1609
1610 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1611 {
1612         struct asic_fixed_properties *prop = &hdev->asic_prop;
1613         struct pci_mem_region *region;
1614
1615         /* CFG */
1616         region = &hdev->pci_mem_region[PCI_REGION_CFG];
1617         region->region_base = CFG_BASE;
1618         region->region_size = CFG_SIZE;
1619         region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1620         region->bar_size = CFG_BAR_SIZE;
1621         region->bar_id = CFG_BAR_ID;
1622         region->used = 1;
1623
1624         /* SRAM */
1625         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1626         region->region_base = SRAM_BASE_ADDR;
1627         region->region_size = SRAM_SIZE;
1628         region->offset_in_bar = 0;
1629         region->bar_size = SRAM_BAR_SIZE;
1630         region->bar_id = SRAM_BAR_ID;
1631         region->used = 1;
1632
1633         /* DRAM */
1634         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1635         region->region_base = DRAM_PHYS_BASE;
1636         region->region_size = hdev->asic_prop.dram_size;
1637         region->offset_in_bar = 0;
1638         region->bar_size = prop->dram_pci_bar_size;
1639         region->bar_id = HBM_BAR_ID;
1640         region->used = 1;
1641
1642         /* SP SRAM */
1643         region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1644         region->region_base = PSOC_SCRATCHPAD_ADDR;
1645         region->region_size = PSOC_SCRATCHPAD_SIZE;
1646         region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1647         region->bar_size = CFG_BAR_SIZE;
1648         region->bar_id = CFG_BAR_ID;
1649         region->used = 1;
1650 }
1651
1652 static int gaudi_sw_init(struct hl_device *hdev)
1653 {
1654         struct gaudi_device *gaudi;
1655         u32 i, event_id = 0;
1656         int rc;
1657
1658         /* Allocate device structure */
1659         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1660         if (!gaudi)
1661                 return -ENOMEM;
1662
1663         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1664                 if (gaudi_irq_map_table[i].valid) {
1665                         if (event_id == GAUDI_EVENT_SIZE) {
1666                                 dev_err(hdev->dev,
1667                                         "Event array exceeds the limit of %u events\n",
1668                                         GAUDI_EVENT_SIZE);
1669                                 rc = -EINVAL;
1670                                 goto free_gaudi_device;
1671                         }
1672
1673                         gaudi->events[event_id++] =
1674                                         gaudi_irq_map_table[i].fc_id;
1675                 }
1676         }
1677
1678         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1679
1680         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1681
1682         hdev->asic_specific = gaudi;
1683
1684         /* Create DMA pool for small allocations */
1685         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1686                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1687         if (!hdev->dma_pool) {
1688                 dev_err(hdev->dev, "failed to create DMA pool\n");
1689                 rc = -ENOMEM;
1690                 goto free_gaudi_device;
1691         }
1692
1693         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1694         if (rc)
1695                 goto free_dma_pool;
1696
1697         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1698         if (!hdev->cpu_accessible_dma_pool) {
1699                 dev_err(hdev->dev,
1700                         "Failed to create CPU accessible DMA pool\n");
1701                 rc = -ENOMEM;
1702                 goto free_cpu_dma_mem;
1703         }
1704
1705         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1706                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1707                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1708         if (rc) {
1709                 dev_err(hdev->dev,
1710                         "Failed to add memory to CPU accessible DMA pool\n");
1711                 rc = -EFAULT;
1712                 goto free_cpu_accessible_dma_pool;
1713         }
1714
1715         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1716         if (rc)
1717                 goto free_cpu_accessible_dma_pool;
1718
1719         spin_lock_init(&gaudi->hw_queues_lock);
1720         mutex_init(&gaudi->clk_gate_mutex);
1721
1722         hdev->supports_sync_stream = true;
1723         hdev->supports_coresight = true;
1724         hdev->supports_staged_submission = true;
1725
1726         gaudi_set_pci_memory_regions(hdev);
1727
1728         return 0;
1729
1730 free_cpu_accessible_dma_pool:
1731         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1732 free_cpu_dma_mem:
1733         if (!hdev->asic_prop.fw_security_enabled)
1734                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1735                                         hdev->cpu_pci_msb_addr);
1736         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1737                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1738                         hdev->cpu_accessible_dma_mem,
1739                         hdev->cpu_accessible_dma_address);
1740 free_dma_pool:
1741         dma_pool_destroy(hdev->dma_pool);
1742 free_gaudi_device:
1743         kfree(gaudi);
1744         return rc;
1745 }
1746
1747 static int gaudi_sw_fini(struct hl_device *hdev)
1748 {
1749         struct gaudi_device *gaudi = hdev->asic_specific;
1750
1751         gaudi_free_internal_qmans_pq_mem(hdev);
1752
1753         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1754
1755         if (!hdev->asic_prop.fw_security_enabled)
1756                 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1757                                         hdev->cpu_pci_msb_addr);
1758
1759         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1760                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1761                         hdev->cpu_accessible_dma_mem,
1762                         hdev->cpu_accessible_dma_address);
1763
1764         dma_pool_destroy(hdev->dma_pool);
1765
1766         mutex_destroy(&gaudi->clk_gate_mutex);
1767
1768         kfree(gaudi);
1769
1770         return 0;
1771 }
1772
1773 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1774 {
1775         struct hl_device *hdev = arg;
1776         int i;
1777
1778         if (hdev->disabled)
1779                 return IRQ_HANDLED;
1780
1781         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1782                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1783
1784         hl_irq_handler_eq(irq, &hdev->event_queue);
1785
1786         return IRQ_HANDLED;
1787 }
1788
1789 /*
1790  * For backward compatibility, new MSI interrupts should be set after the
1791  * existing CPU and NIC interrupts.
1792  */
1793 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1794                                 bool cpu_eq)
1795 {
1796         int msi_vec;
1797
1798         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1799                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1800                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1801
1802         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1803                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1804
1805         return pci_irq_vector(hdev->pdev, msi_vec);
1806 }
1807
1808 static int gaudi_enable_msi_single(struct hl_device *hdev)
1809 {
1810         int rc, irq;
1811
1812         dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
1813
1814         irq = gaudi_pci_irq_vector(hdev, 0, false);
1815         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1816                         "gaudi single msi", hdev);
1817         if (rc)
1818                 dev_err(hdev->dev,
1819                         "Failed to request single MSI IRQ\n");
1820
1821         return rc;
1822 }
1823
1824 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1825 {
1826         int cq_cnt = hdev->asic_prop.completion_queues_count;
1827         int rc, i, irq_cnt_init, irq;
1828
1829         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1830                 irq = gaudi_pci_irq_vector(hdev, i, false);
1831                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1832                                 &hdev->completion_queue[i]);
1833                 if (rc) {
1834                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1835                         goto free_irqs;
1836                 }
1837         }
1838
1839         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1840         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1841                                 &hdev->event_queue);
1842         if (rc) {
1843                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1844                 goto free_irqs;
1845         }
1846
1847         return 0;
1848
1849 free_irqs:
1850         for (i = 0 ; i < irq_cnt_init ; i++)
1851                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1852                                 &hdev->completion_queue[i]);
1853         return rc;
1854 }
1855
1856 static int gaudi_enable_msi(struct hl_device *hdev)
1857 {
1858         struct gaudi_device *gaudi = hdev->asic_specific;
1859         int rc;
1860
1861         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1862                 return 0;
1863
1864         rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
1865         if (rc < 0) {
1866                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1867                 return rc;
1868         }
1869
1870         if (rc < NUMBER_OF_INTERRUPTS) {
1871                 gaudi->multi_msi_mode = false;
1872                 rc = gaudi_enable_msi_single(hdev);
1873         } else {
1874                 gaudi->multi_msi_mode = true;
1875                 rc = gaudi_enable_msi_multi(hdev);
1876         }
1877
1878         if (rc)
1879                 goto free_pci_irq_vectors;
1880
1881         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1882
1883         return 0;
1884
1885 free_pci_irq_vectors:
1886         pci_free_irq_vectors(hdev->pdev);
1887         return rc;
1888 }
1889
1890 static void gaudi_sync_irqs(struct hl_device *hdev)
1891 {
1892         struct gaudi_device *gaudi = hdev->asic_specific;
1893         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1894
1895         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1896                 return;
1897
1898         /* Wait for all pending IRQs to be finished */
1899         if (gaudi->multi_msi_mode) {
1900                 for (i = 0 ; i < cq_cnt ; i++)
1901                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1902
1903                 synchronize_irq(gaudi_pci_irq_vector(hdev,
1904                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
1905                                                 true));
1906         } else {
1907                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1908         }
1909 }
1910
1911 static void gaudi_disable_msi(struct hl_device *hdev)
1912 {
1913         struct gaudi_device *gaudi = hdev->asic_specific;
1914         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1915
1916         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1917                 return;
1918
1919         gaudi_sync_irqs(hdev);
1920
1921         if (gaudi->multi_msi_mode) {
1922                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1923                                                 true);
1924                 free_irq(irq, &hdev->event_queue);
1925
1926                 for (i = 0 ; i < cq_cnt ; i++) {
1927                         irq = gaudi_pci_irq_vector(hdev, i, false);
1928                         free_irq(irq, &hdev->completion_queue[i]);
1929                 }
1930         } else {
1931                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1932         }
1933
1934         pci_free_irq_vectors(hdev->pdev);
1935
1936         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1937 }
1938
1939 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1940 {
1941         struct gaudi_device *gaudi = hdev->asic_specific;
1942
1943         if (hdev->asic_prop.fw_security_enabled)
1944                 return;
1945
1946         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
1947                                                 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
1948                 return;
1949
1950         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1951                 return;
1952
1953         if (!hdev->sram_scrambler_enable)
1954                 return;
1955
1956         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1957                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1958         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1959                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1960         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1961                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1962         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1963                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1964         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1965                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1966         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1967                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1968         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1969                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1970         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1971                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1972
1973         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1974                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1975         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1976                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1977         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1978                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1979         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1980                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1981         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1982                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1983         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1984                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1985         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1986                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1987         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1988                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1989
1990         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1991                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1992         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1993                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1994         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1995                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1996         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1997                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1998         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1999                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2000         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2001                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2002         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2003                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2004         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2005                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2006
2007         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2008 }
2009
2010 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2011 {
2012         struct gaudi_device *gaudi = hdev->asic_specific;
2013
2014         if (hdev->asic_prop.fw_security_enabled)
2015                 return;
2016
2017         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2018                                         CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2019                 return;
2020
2021         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2022                 return;
2023
2024         if (!hdev->dram_scrambler_enable)
2025                 return;
2026
2027         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2028                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2029         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2030                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2031         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2032                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2033         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2034                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2035         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2036                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2037         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2038                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2039         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2040                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2041         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2042                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2043
2044         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2045                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2046         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2047                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2048         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2049                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2050         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2051                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2052         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2053                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2054         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2055                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2056         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2057                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2058         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2059                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2060
2061         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2062                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2063         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2064                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2065         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2066                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2067         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2068                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2069         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2070                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2071         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2072                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2073         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2074                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2075         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2076                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2077
2078         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2079 }
2080
2081 static void gaudi_init_e2e(struct hl_device *hdev)
2082 {
2083         if (hdev->asic_prop.fw_security_enabled)
2084                 return;
2085
2086         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2087                                         CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2088                 return;
2089
2090         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2091         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2092         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2093         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2094
2095         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2096         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2097         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2098         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2099
2100         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2101         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2102         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2103         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2104
2105         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2106         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2107         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2108         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2109
2110         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2111         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2112         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2113         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2114
2115         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2116         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2117         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2118         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2119
2120         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2121         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2122         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2123         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2124
2125         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2126         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2127         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2128         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2129
2130         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2131         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2132         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2133         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2134
2135         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2136         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2137         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2138         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2139
2140         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2141         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2142         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2143         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2144
2145         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2146         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2147         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2148         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2149
2150         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2151         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2152         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2153         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2154
2155         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2156         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2157         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2158         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2159
2160         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2161         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2162         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2163         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2164
2165         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2166         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2167         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2168         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2169
2170         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2171         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2172         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2173         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2174
2175         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2176         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2177         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2178         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2179
2180         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2181         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2182         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2183         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2184
2185         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2186         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2187         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2188         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2189
2190         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2191         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2192         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2193         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2194
2195         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2196         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2197         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2198         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2199
2200         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2201         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2202         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2203         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2204
2205         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2206         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2207         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2208         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2209
2210         if (!hdev->dram_scrambler_enable) {
2211                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2212                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2213                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2214                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2215
2216                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2217                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2218                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2219                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2220
2221                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2222                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2223                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2224                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2225
2226                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2227                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2228                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2229                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2230
2231                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2232                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2233                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2234                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2235
2236                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2237                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2238                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2239                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2240
2241                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2242                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2243                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2244                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2245
2246                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2247                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2248                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2249                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2250
2251                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
2252                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
2253                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
2254                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
2255
2256                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
2257                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
2258                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
2259                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
2260
2261                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
2262                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
2263                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
2264                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
2265
2266                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
2267                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
2268                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
2269                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
2270
2271                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
2272                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
2273                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
2274                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
2275
2276                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
2277                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
2278                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
2279                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
2280
2281                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
2282                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
2283                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
2284                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
2285
2286                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
2287                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
2288                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
2289                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
2290
2291                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2292                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2293                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2294                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2295
2296                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2297                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2298                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2299                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2300
2301                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2302                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2303                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2304                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2305
2306                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2307                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2308                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2309                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2310
2311                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2312                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2313                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2314                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2315
2316                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2317                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2318                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2319                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2320
2321                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
2322                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
2323                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
2324                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
2325
2326                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
2327                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
2328                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
2329                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
2330         }
2331
2332         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2333                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2335                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336
2337         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2338                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2340                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341
2342         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2343                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2345                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346
2347         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2348                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2350                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351
2352         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2353                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2355                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356
2357         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2358                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2360                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361
2362         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2363                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2365                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366
2367         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2368                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2370                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371
2372         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2373                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2375                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376
2377         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2378                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2380                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381
2382         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2383                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2385                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386
2387         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2388                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2389         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2390                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2391
2392         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2393                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2394         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2395                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2396
2397         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2398                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2399         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2400                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2401
2402         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2403                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2404         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2405                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2406
2407         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2408                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2409         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2410                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2411
2412         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2413                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2415                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416
2417         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2418                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2420                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421
2422         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2423                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2425                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426
2427         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2428                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2429         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2430                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2431
2432         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2433                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2434         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2435                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2436
2437         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2438                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2439         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2440                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2441
2442         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2443                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2444         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2445                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2446
2447         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2448                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2449         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2450                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2451 }
2452
2453 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2454 {
2455         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2456
2457         if (hdev->asic_prop.fw_security_enabled)
2458                 return;
2459
2460         if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2461                                                 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2462                 return;
2463
2464         hbm0_wr = 0x33333333;
2465         hbm0_rd = 0x77777777;
2466         hbm1_wr = 0x55555555;
2467         hbm1_rd = 0xDDDDDDDD;
2468
2469         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2470         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2471         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2472         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2473
2474         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2475         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2476         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2477         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2478
2479         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2480         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2481         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2482         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2483
2484         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2485         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2486         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2487         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2488
2489         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2490                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2491                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2492         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2493                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2496                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2499                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501
2502         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2503                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2506                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2509                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2512                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514 }
2515
2516 static void gaudi_init_golden_registers(struct hl_device *hdev)
2517 {
2518         u32 tpc_offset;
2519         int tpc_id, i;
2520
2521         gaudi_init_e2e(hdev);
2522         gaudi_init_hbm_cred(hdev);
2523
2524         for (tpc_id = 0, tpc_offset = 0;
2525                                 tpc_id < TPC_NUMBER_OF_ENGINES;
2526                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2527                 /* Mask all arithmetic interrupts from TPC */
2528                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2529                 /* Set 16 cache lines */
2530                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2531                                 ICACHE_FETCH_LINE_NUM, 2);
2532         }
2533
2534         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2535         for (i = 0 ; i < 128 ; i += 8)
2536                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2537
2538         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 }
2543
2544 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2545                                         int qman_id, dma_addr_t qman_pq_addr)
2546 {
2547         struct cpu_dyn_regs *dyn_regs =
2548                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2549         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2550         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2551         u32 q_off, dma_qm_offset;
2552         u32 dma_qm_err_cfg, irq_handler_offset;
2553
2554         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2555
2556         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2557                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2559                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560         so_base_en_lo = lower_32_bits(CFG_BASE +
2561                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562         so_base_en_hi = upper_32_bits(CFG_BASE +
2563                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2565                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2567                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568         so_base_ws_lo = lower_32_bits(CFG_BASE +
2569                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570         so_base_ws_hi = upper_32_bits(CFG_BASE +
2571                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572
2573         q_off = dma_qm_offset + qman_id * 4;
2574
2575         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2576         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2577
2578         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2579         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2580         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2581
2582         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2583         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2584                                                         QMAN_LDMA_SRC_OFFSET);
2585         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2586                                                         QMAN_LDMA_DST_OFFSET);
2587
2588         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2589         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2590         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2591         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2592         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2593         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2594         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2595         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2596
2597         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2598
2599         /* The following configuration is needed only once per QMAN */
2600         if (qman_id == 0) {
2601                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2602                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2603                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2604
2605                 /* Configure RAZWI IRQ */
2606                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2607                 if (hdev->stop_on_err)
2608                         dma_qm_err_cfg |=
2609                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2610
2611                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2612
2613                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2614                         lower_32_bits(CFG_BASE + irq_handler_offset));
2615                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2616                         upper_32_bits(CFG_BASE + irq_handler_offset));
2617
2618                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2619                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2620                                                                         dma_id);
2621
2622                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2623                                 QM_ARB_ERR_MSG_EN_MASK);
2624
2625                 /* Increase ARB WDT to support streams architecture */
2626                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2627                                 GAUDI_ARB_WDT_TIMEOUT);
2628
2629                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630                                 QMAN_EXTERNAL_MAKE_TRUSTED);
2631
2632                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2633         }
2634 }
2635
2636 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2637 {
2638         struct cpu_dyn_regs *dyn_regs =
2639                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642         u32 irq_handler_offset;
2643
2644         /* Set to maximum possible according to physical size */
2645         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2647
2648         /* WA for H/W bug H3-2116 */
2649         WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2650
2651         /* STOP_ON bit implies no completion to operation in case of RAZWI */
2652         if (hdev->stop_on_err)
2653                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2654
2655         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2656
2657         irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659                         le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2660
2661         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662                 lower_32_bits(CFG_BASE + irq_handler_offset));
2663         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664                 upper_32_bits(CFG_BASE + irq_handler_offset));
2665
2666         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668         WREG32(mmDMA0_CORE_PROT + dma_offset,
2669                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670         /* If the channel is secured, it should be in MMU bypass mode */
2671         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2674 }
2675
2676 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2677                                 u32 enable_mask)
2678 {
2679         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2680
2681         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2682 }
2683
2684 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2685 {
2686         struct gaudi_device *gaudi = hdev->asic_specific;
2687         struct hl_hw_queue *q;
2688         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2689
2690         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2691                 return;
2692
2693         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694                 dma_id = gaudi_dma_assignment[i];
2695                 /*
2696                  * For queues after the CPU Q need to add 1 to get the correct
2697                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
2698                  * order to get the correct MSI register.
2699                  */
2700                 if (dma_id > 1) {
2701                         cpu_skip = 1;
2702                         nic_skip = NIC_NUMBER_OF_ENGINES;
2703                 } else {
2704                         cpu_skip = 0;
2705                         nic_skip = 0;
2706                 }
2707
2708                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709                         q_idx = 4 * dma_id + j + cpu_skip;
2710                         q = &hdev->kernel_queues[q_idx];
2711                         q->cq_id = cq_id++;
2712                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
2714                                                 q->bus_address);
2715                 }
2716
2717                 gaudi_init_dma_core(hdev, dma_id);
2718
2719                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2720         }
2721
2722         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2723 }
2724
2725 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726                                         int qman_id, u64 qman_base_addr)
2727 {
2728         struct cpu_dyn_regs *dyn_regs =
2729                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732         u32 dma_qm_err_cfg, irq_handler_offset;
2733         u32 q_off, dma_qm_offset;
2734
2735         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2736
2737         mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739         mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741         so_base_en_lo = lower_32_bits(CFG_BASE +
2742                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743         so_base_en_hi = upper_32_bits(CFG_BASE +
2744                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749         so_base_ws_lo = lower_32_bits(CFG_BASE +
2750                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751         so_base_ws_hi = upper_32_bits(CFG_BASE +
2752                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753
2754         q_off = dma_qm_offset + qman_id * 4;
2755
2756         if (qman_id < 4) {
2757                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758                                         lower_32_bits(qman_base_addr));
2759                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760                                         upper_32_bits(qman_base_addr));
2761
2762                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2765
2766                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767                                                         QMAN_CPDMA_SIZE_OFFSET);
2768                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769                                                         QMAN_CPDMA_SRC_OFFSET);
2770                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771                                                         QMAN_CPDMA_DST_OFFSET);
2772         } else {
2773                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775                                 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2776
2777                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778                                                         QMAN_LDMA_SIZE_OFFSET);
2779                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780                                                         QMAN_LDMA_SRC_OFFSET);
2781                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782                                                         QMAN_LDMA_DST_OFFSET);
2783
2784                 /* Configure RAZWI IRQ */
2785                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786                 if (hdev->stop_on_err)
2787                         dma_qm_err_cfg |=
2788                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789
2790                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791
2792                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793                         lower_32_bits(CFG_BASE + irq_handler_offset));
2794                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795                         upper_32_bits(CFG_BASE + irq_handler_offset));
2796
2797                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799                                                                         dma_id);
2800
2801                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802                                 QM_ARB_ERR_MSG_EN_MASK);
2803
2804                 /* Increase ARB WDT to support streams architecture */
2805                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2806                                 GAUDI_ARB_WDT_TIMEOUT);
2807
2808                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2809                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2810                                 QMAN_INTERNAL_MAKE_TRUSTED);
2811         }
2812
2813         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2814         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2815         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2816         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2817
2818         /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2819         if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2820                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2821                                 mtr_base_ws_lo);
2822                 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2823                                 mtr_base_ws_hi);
2824                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2825                                 so_base_ws_lo);
2826                 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2827                                 so_base_ws_hi);
2828         }
2829 }
2830
2831 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2832 {
2833         struct gaudi_device *gaudi = hdev->asic_specific;
2834         struct gaudi_internal_qman_info *q;
2835         u64 qman_base_addr;
2836         int i, j, dma_id, internal_q_index;
2837
2838         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2839                 return;
2840
2841         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2842                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2843
2844                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2845                          /*
2846                           * Add the CPU queue in order to get the correct queue
2847                           * number as all internal queue are placed after it
2848                           */
2849                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2850
2851                         q = &gaudi->internal_qmans[internal_q_index];
2852                         qman_base_addr = (u64) q->pq_dma_addr;
2853                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2854                                                 qman_base_addr);
2855                 }
2856
2857                 /* Initializing lower CP for HBM DMA QMAN */
2858                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2859
2860                 gaudi_init_dma_core(hdev, dma_id);
2861
2862                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2863         }
2864
2865         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2866 }
2867
2868 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2869                                         int qman_id, u64 qman_base_addr)
2870 {
2871         struct cpu_dyn_regs *dyn_regs =
2872                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2873         u32 mtr_base_lo, mtr_base_hi;
2874         u32 so_base_lo, so_base_hi;
2875         u32 irq_handler_offset;
2876         u32 q_off, mme_id;
2877         u32 mme_qm_err_cfg;
2878
2879         mtr_base_lo = lower_32_bits(CFG_BASE +
2880                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881         mtr_base_hi = upper_32_bits(CFG_BASE +
2882                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2883         so_base_lo = lower_32_bits(CFG_BASE +
2884                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885         so_base_hi = upper_32_bits(CFG_BASE +
2886                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2887
2888         q_off = mme_offset + qman_id * 4;
2889
2890         if (qman_id < 4) {
2891                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2892                                         lower_32_bits(qman_base_addr));
2893                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2894                                         upper_32_bits(qman_base_addr));
2895
2896                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2897                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2898                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2899
2900                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2901                                                         QMAN_CPDMA_SIZE_OFFSET);
2902                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2903                                                         QMAN_CPDMA_SRC_OFFSET);
2904                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2905                                                         QMAN_CPDMA_DST_OFFSET);
2906         } else {
2907                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2908                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2909                                 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2910
2911                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2912                                                         QMAN_LDMA_SIZE_OFFSET);
2913                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2914                                                         QMAN_LDMA_SRC_OFFSET);
2915                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2916                                                         QMAN_LDMA_DST_OFFSET);
2917
2918                 /* Configure RAZWI IRQ */
2919                 mme_id = mme_offset /
2920                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2921
2922                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2923                 if (hdev->stop_on_err)
2924                         mme_qm_err_cfg |=
2925                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2926
2927                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2928
2929                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2930                         lower_32_bits(CFG_BASE + irq_handler_offset));
2931                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2932                         upper_32_bits(CFG_BASE + irq_handler_offset));
2933
2934                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2935                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2936                                                                         mme_id);
2937
2938                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2939                                 QM_ARB_ERR_MSG_EN_MASK);
2940
2941                 /* Increase ARB WDT to support streams architecture */
2942                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2943                                 GAUDI_ARB_WDT_TIMEOUT);
2944
2945                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2946                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2947                                 QMAN_INTERNAL_MAKE_TRUSTED);
2948         }
2949
2950         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2951         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2952         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2953         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2954 }
2955
2956 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2957 {
2958         struct gaudi_device *gaudi = hdev->asic_specific;
2959         struct gaudi_internal_qman_info *q;
2960         u64 qman_base_addr;
2961         u32 mme_offset;
2962         int i, internal_q_index;
2963
2964         if (gaudi->hw_cap_initialized & HW_CAP_MME)
2965                 return;
2966
2967         /*
2968          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2969          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2970          */
2971
2972         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2973
2974         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2975                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2976                 q = &gaudi->internal_qmans[internal_q_index];
2977                 qman_base_addr = (u64) q->pq_dma_addr;
2978                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2979                                         qman_base_addr);
2980                 if (i == 3)
2981                         mme_offset = 0;
2982         }
2983
2984         /* Initializing lower CP for MME QMANs */
2985         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2986         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2987         gaudi_init_mme_qman(hdev, 0, 4, 0);
2988
2989         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2990         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2991
2992         gaudi->hw_cap_initialized |= HW_CAP_MME;
2993 }
2994
2995 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2996                                 int qman_id, u64 qman_base_addr)
2997 {
2998         struct cpu_dyn_regs *dyn_regs =
2999                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3000         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3001         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3002         u32 tpc_qm_err_cfg, irq_handler_offset;
3003         u32 q_off, tpc_id;
3004
3005         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3006                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3008                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3009         so_base_en_lo = lower_32_bits(CFG_BASE +
3010                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011         so_base_en_hi = upper_32_bits(CFG_BASE +
3012                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3013         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3014                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3016                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3017         so_base_ws_lo = lower_32_bits(CFG_BASE +
3018                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019         so_base_ws_hi = upper_32_bits(CFG_BASE +
3020                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3021
3022         q_off = tpc_offset + qman_id * 4;
3023
3024         tpc_id = tpc_offset /
3025                         (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3026
3027         if (qman_id < 4) {
3028                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3029                                         lower_32_bits(qman_base_addr));
3030                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3031                                         upper_32_bits(qman_base_addr));
3032
3033                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3034                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3035                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3036
3037                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3038                                                         QMAN_CPDMA_SIZE_OFFSET);
3039                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3040                                                         QMAN_CPDMA_SRC_OFFSET);
3041                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3042                                                         QMAN_CPDMA_DST_OFFSET);
3043         } else {
3044                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3045                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3046                                 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3047
3048                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3049                                                         QMAN_LDMA_SIZE_OFFSET);
3050                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3051                                                         QMAN_LDMA_SRC_OFFSET);
3052                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3053                                                         QMAN_LDMA_DST_OFFSET);
3054
3055                 /* Configure RAZWI IRQ */
3056                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3057                 if (hdev->stop_on_err)
3058                         tpc_qm_err_cfg |=
3059                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3060
3061                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3062
3063                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3064                         lower_32_bits(CFG_BASE + irq_handler_offset));
3065                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3066                         upper_32_bits(CFG_BASE + irq_handler_offset));
3067
3068                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3069                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3070                                                                         tpc_id);
3071
3072                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3073                                 QM_ARB_ERR_MSG_EN_MASK);
3074
3075                 /* Increase ARB WDT to support streams architecture */
3076                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
3077                                 GAUDI_ARB_WDT_TIMEOUT);
3078
3079                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3080                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3081                                 QMAN_INTERNAL_MAKE_TRUSTED);
3082         }
3083
3084         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3085         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3086         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3087         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3088
3089         /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3090         if (tpc_id == 6) {
3091                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3092                                 mtr_base_ws_lo);
3093                 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3094                                 mtr_base_ws_hi);
3095                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3096                                 so_base_ws_lo);
3097                 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3098                                 so_base_ws_hi);
3099         }
3100 }
3101
3102 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3103 {
3104         struct gaudi_device *gaudi = hdev->asic_specific;
3105         struct gaudi_internal_qman_info *q;
3106         u64 qman_base_addr;
3107         u32 so_base_hi, tpc_offset = 0;
3108         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3109                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3110         int i, tpc_id, internal_q_index;
3111
3112         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3113                 return;
3114
3115         so_base_hi = upper_32_bits(CFG_BASE +
3116                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3117
3118         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3119                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3120                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3121                                                 tpc_id * QMAN_STREAMS + i;
3122                         q = &gaudi->internal_qmans[internal_q_index];
3123                         qman_base_addr = (u64) q->pq_dma_addr;
3124                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
3125                                                 qman_base_addr);
3126
3127                         if (i == 3) {
3128                                 /* Initializing lower CP for TPC QMAN */
3129                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3130
3131                                 /* Enable the QMAN and TPC channel */
3132                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3133                                                 QMAN_TPC_ENABLE);
3134                         }
3135                 }
3136
3137                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3138                                 so_base_hi);
3139
3140                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3141
3142                 gaudi->hw_cap_initialized |=
3143                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3144         }
3145 }
3146
3147 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3148                                 int qman_id, u64 qman_base_addr, int nic_id)
3149 {
3150         struct cpu_dyn_regs *dyn_regs =
3151                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3152         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3153         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3154         u32 nic_qm_err_cfg, irq_handler_offset;
3155         u32 q_off;
3156
3157         mtr_base_en_lo = lower_32_bits(CFG_BASE +
3158                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159         mtr_base_en_hi = upper_32_bits(CFG_BASE +
3160                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3161         so_base_en_lo = lower_32_bits(CFG_BASE +
3162                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163         so_base_en_hi = upper_32_bits(CFG_BASE +
3164                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3165         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3166                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3168                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3169         so_base_ws_lo = lower_32_bits(CFG_BASE +
3170                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171         so_base_ws_hi = upper_32_bits(CFG_BASE +
3172                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3173
3174         q_off = nic_offset + qman_id * 4;
3175
3176         WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3177         WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3178
3179         WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3180         WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3181         WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3182
3183         WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3184                                                         QMAN_LDMA_SIZE_OFFSET);
3185         WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3186                                                         QMAN_LDMA_SRC_OFFSET);
3187         WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3188                                                         QMAN_LDMA_DST_OFFSET);
3189
3190         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3191         WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3192         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3193         WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3194
3195         /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3196         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3197         WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3198         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3199         WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3200
3201         if (qman_id == 0) {
3202                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3203                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3204                                 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3205
3206                 /* Configure RAZWI IRQ */
3207                 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3208                 if (hdev->stop_on_err)
3209                         nic_qm_err_cfg |=
3210                                 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3211
3212                 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3213
3214                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3215                         lower_32_bits(CFG_BASE + irq_handler_offset));
3216                 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3217                         upper_32_bits(CFG_BASE + irq_handler_offset));
3218
3219                 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3220                         gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3221                                                                         nic_id);
3222
3223                 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3224                                 QM_ARB_ERR_MSG_EN_MASK);
3225
3226                 /* Increase ARB WDT to support streams architecture */
3227                 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset,
3228                                 GAUDI_ARB_WDT_TIMEOUT);
3229
3230                 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3231                 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3232                                 QMAN_INTERNAL_MAKE_TRUSTED);
3233         }
3234 }
3235
3236 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3237 {
3238         struct gaudi_device *gaudi = hdev->asic_specific;
3239         struct gaudi_internal_qman_info *q;
3240         u64 qman_base_addr;
3241         u32 nic_offset = 0;
3242         u32 nic_delta_between_qmans =
3243                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244         u32 nic_delta_between_nics =
3245                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3246         int i, nic_id, internal_q_index;
3247
3248         if (!hdev->nic_ports_mask)
3249                 return;
3250
3251         if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3252                 return;
3253
3254         dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3255
3256         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3257                 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3258                         nic_offset += nic_delta_between_qmans;
3259                         if (nic_id & 1) {
3260                                 nic_offset -= (nic_delta_between_qmans * 2);
3261                                 nic_offset += nic_delta_between_nics;
3262                         }
3263                         continue;
3264                 }
3265
3266                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3267                         internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3268                                                 nic_id * QMAN_STREAMS + i;
3269                         q = &gaudi->internal_qmans[internal_q_index];
3270                         qman_base_addr = (u64) q->pq_dma_addr;
3271                         gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3272                                                 qman_base_addr, nic_id);
3273                 }
3274
3275                 /* Enable the QMAN */
3276                 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3277
3278                 nic_offset += nic_delta_between_qmans;
3279                 if (nic_id & 1) {
3280                         nic_offset -= (nic_delta_between_qmans * 2);
3281                         nic_offset += nic_delta_between_nics;
3282                 }
3283
3284                 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3285         }
3286 }
3287
3288 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3289 {
3290         struct gaudi_device *gaudi = hdev->asic_specific;
3291
3292         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3293                 return;
3294
3295         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3296         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3297         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3298 }
3299
3300 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3301 {
3302         struct gaudi_device *gaudi = hdev->asic_specific;
3303
3304         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3305                 return;
3306
3307         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3308         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3309         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3310         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3311         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3312 }
3313
3314 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3315 {
3316         struct gaudi_device *gaudi = hdev->asic_specific;
3317
3318         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3319                 return;
3320
3321         WREG32(mmMME2_QM_GLBL_CFG0, 0);
3322         WREG32(mmMME0_QM_GLBL_CFG0, 0);
3323 }
3324
3325 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3326 {
3327         struct gaudi_device *gaudi = hdev->asic_specific;
3328         u32 tpc_offset = 0;
3329         int tpc_id;
3330
3331         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3332                 return;
3333
3334         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3335                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3336                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3337         }
3338 }
3339
3340 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3341 {
3342         struct gaudi_device *gaudi = hdev->asic_specific;
3343         u32 nic_mask, nic_offset = 0;
3344         u32 nic_delta_between_qmans =
3345                         mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346         u32 nic_delta_between_nics =
3347                         mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3348         int nic_id;
3349
3350         for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3351                 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3352
3353                 if (gaudi->hw_cap_initialized & nic_mask)
3354                         WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3355
3356                 nic_offset += nic_delta_between_qmans;
3357                 if (nic_id & 1) {
3358                         nic_offset -= (nic_delta_between_qmans * 2);
3359                         nic_offset += nic_delta_between_nics;
3360                 }
3361         }
3362 }
3363
3364 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3365 {
3366         struct gaudi_device *gaudi = hdev->asic_specific;
3367
3368         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3369                 return;
3370
3371         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3372         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3374         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3375 }
3376
3377 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3378 {
3379         struct gaudi_device *gaudi = hdev->asic_specific;
3380
3381         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3382                 return;
3383
3384         /* Stop CPs of HBM DMA QMANs */
3385
3386         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3390         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3391 }
3392
3393 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3394 {
3395         struct gaudi_device *gaudi = hdev->asic_specific;
3396
3397         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3398                 return;
3399
3400         /* Stop CPs of MME QMANs */
3401         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3402         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3403 }
3404
3405 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3406 {
3407         struct gaudi_device *gaudi = hdev->asic_specific;
3408
3409         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3410                 return;
3411
3412         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3419         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3420 }
3421
3422 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3423 {
3424         struct gaudi_device *gaudi = hdev->asic_specific;
3425
3426         /* Stop upper CPs of QMANs */
3427
3428         if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3429                 WREG32(mmNIC0_QM0_GLBL_CFG1,
3430                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3431                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3432                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3433
3434         if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3435                 WREG32(mmNIC0_QM1_GLBL_CFG1,
3436                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3437                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3438                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3439
3440         if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3441                 WREG32(mmNIC1_QM0_GLBL_CFG1,
3442                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3443                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3444                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3445
3446         if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3447                 WREG32(mmNIC1_QM1_GLBL_CFG1,
3448                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3449                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3450                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3451
3452         if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3453                 WREG32(mmNIC2_QM0_GLBL_CFG1,
3454                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3455                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3456                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3457
3458         if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3459                 WREG32(mmNIC2_QM1_GLBL_CFG1,
3460                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3461                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3462                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3463
3464         if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3465                 WREG32(mmNIC3_QM0_GLBL_CFG1,
3466                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3467                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3468                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3469
3470         if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3471                 WREG32(mmNIC3_QM1_GLBL_CFG1,
3472                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3473                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3474                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3475
3476         if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3477                 WREG32(mmNIC4_QM0_GLBL_CFG1,
3478                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3479                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3480                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3481
3482         if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3483                 WREG32(mmNIC4_QM1_GLBL_CFG1,
3484                                 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3485                                 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3486                                 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3487 }
3488
3489 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3490 {
3491         struct gaudi_device *gaudi = hdev->asic_specific;
3492
3493         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3494                 return;
3495
3496         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3498         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3499 }
3500
3501 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3502 {
3503         struct gaudi_device *gaudi = hdev->asic_specific;
3504
3505         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3506                 return;
3507
3508         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3512         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3513 }
3514
3515 static void gaudi_mme_stall(struct hl_device *hdev)
3516 {
3517         struct gaudi_device *gaudi = hdev->asic_specific;
3518
3519         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3520                 return;
3521
3522         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3523         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3536         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3537         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3538         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3539 }
3540
3541 static void gaudi_tpc_stall(struct hl_device *hdev)
3542 {
3543         struct gaudi_device *gaudi = hdev->asic_specific;
3544
3545         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3546                 return;
3547
3548         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3555         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3556 }
3557
3558 static void gaudi_set_clock_gating(struct hl_device *hdev)
3559 {
3560         struct gaudi_device *gaudi = hdev->asic_specific;
3561         u32 qman_offset;
3562         bool enable;
3563         int i;
3564
3565         /* In case we are during debug session, don't enable the clock gate
3566          * as it may interfere
3567          */
3568         if (hdev->in_debug)
3569                 return;
3570
3571         if (hdev->asic_prop.fw_security_enabled)
3572                 return;
3573
3574         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
3575                 enable = !!(hdev->clock_gating_mask &
3576                                 (BIT_ULL(gaudi_dma_assignment[i])));
3577
3578                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3579                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3580                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3581                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3582                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
3583         }
3584
3585         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
3586                 enable = !!(hdev->clock_gating_mask &
3587                                 (BIT_ULL(gaudi_dma_assignment[i])));
3588
3589                 /* GC sends work to DMA engine through Upper CP in DMA5 so
3590                  * we need to not enable clock gating in that DMA
3591                  */
3592                 if (i == GAUDI_HBM_DMA_4)
3593                         enable = 0;
3594
3595                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
3596                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
3597                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3598                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
3599                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3600         }
3601
3602         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
3603         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3604         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3605
3606         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
3607         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3608         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3609
3610         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3611                 enable = !!(hdev->clock_gating_mask &
3612                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
3613
3614                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
3615                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
3616                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
3617                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
3618
3619                 qman_offset += TPC_QMAN_OFFSET;
3620         }
3621
3622         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
3623 }
3624
3625 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3626 {
3627         struct gaudi_device *gaudi = hdev->asic_specific;
3628         u32 qman_offset;
3629         int i;
3630
3631         if (hdev->asic_prop.fw_security_enabled)
3632                 return;
3633
3634         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3635                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3636                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3637
3638                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3639         }
3640
3641         WREG32(mmMME0_QM_CGM_CFG, 0);
3642         WREG32(mmMME0_QM_CGM_CFG1, 0);
3643         WREG32(mmMME2_QM_CGM_CFG, 0);
3644         WREG32(mmMME2_QM_CGM_CFG1, 0);
3645
3646         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3647                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3648                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3649
3650                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3651         }
3652
3653         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
3654 }
3655
3656 static void gaudi_enable_timestamp(struct hl_device *hdev)
3657 {
3658         /* Disable the timestamp counter */
3659         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3660
3661         /* Zero the lower/upper parts of the 64-bit counter */
3662         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3663         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3664
3665         /* Enable the counter */
3666         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3667 }
3668
3669 static void gaudi_disable_timestamp(struct hl_device *hdev)
3670 {
3671         /* Disable the timestamp counter */
3672         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3673 }
3674
3675 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
3676 {
3677         u32 wait_timeout_ms;
3678
3679         dev_info(hdev->dev,
3680                 "Halting compute engines and disabling interrupts\n");
3681
3682         if (hdev->pldm)
3683                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3684         else
3685                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3686
3687         gaudi_stop_nic_qmans(hdev);
3688         gaudi_stop_mme_qmans(hdev);
3689         gaudi_stop_tpc_qmans(hdev);
3690         gaudi_stop_hbm_dma_qmans(hdev);
3691         gaudi_stop_pci_dma_qmans(hdev);
3692
3693         hdev->asic_funcs->disable_clock_gating(hdev);
3694
3695         msleep(wait_timeout_ms);
3696
3697         gaudi_pci_dma_stall(hdev);
3698         gaudi_hbm_dma_stall(hdev);
3699         gaudi_tpc_stall(hdev);
3700         gaudi_mme_stall(hdev);
3701
3702         msleep(wait_timeout_ms);
3703
3704         gaudi_disable_nic_qmans(hdev);
3705         gaudi_disable_mme_qmans(hdev);
3706         gaudi_disable_tpc_qmans(hdev);
3707         gaudi_disable_hbm_dma_qmans(hdev);
3708         gaudi_disable_pci_dma_qmans(hdev);
3709
3710         gaudi_disable_timestamp(hdev);
3711
3712         gaudi_disable_msi(hdev);
3713 }
3714
3715 static int gaudi_mmu_init(struct hl_device *hdev)
3716 {
3717         struct asic_fixed_properties *prop = &hdev->asic_prop;
3718         struct gaudi_device *gaudi = hdev->asic_specific;
3719         u64 hop0_addr;
3720         int rc, i;
3721
3722         if (!hdev->mmu_enable)
3723                 return 0;
3724
3725         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3726                 return 0;
3727
3728         for (i = 0 ; i < prop->max_asid ; i++) {
3729                 hop0_addr = prop->mmu_pgt_addr +
3730                                 (i * prop->mmu_hop_table_size);
3731
3732                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3733                 if (rc) {
3734                         dev_err(hdev->dev,
3735                                 "failed to set hop0 addr for asid %d\n", i);
3736                         goto err;
3737                 }
3738         }
3739
3740         /* init MMU cache manage page */
3741         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
3742         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
3743
3744         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
3745
3746         WREG32(mmMMU_UP_MMU_ENABLE, 1);
3747         WREG32(mmMMU_UP_SPI_MASK, 0xF);
3748
3749         WREG32(mmSTLB_HOP_CONFIGURATION,
3750                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
3751
3752         /*
3753          * The H/W expects the first PI after init to be 1. After wraparound
3754          * we'll write 0.
3755          */
3756         gaudi->mmu_cache_inv_pi = 1;
3757
3758         gaudi->hw_cap_initialized |= HW_CAP_MMU;
3759
3760         return 0;
3761
3762 err:
3763         return rc;
3764 }
3765
3766 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3767 {
3768         void __iomem *dst;
3769
3770         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3771
3772         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3773 }
3774
3775 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3776 {
3777         void __iomem *dst;
3778
3779         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3780
3781         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3782 }
3783
3784 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3785 {
3786         struct dynamic_fw_load_mgr *dynamic_loader;
3787         struct cpu_dyn_regs *dyn_regs;
3788
3789         dynamic_loader = &hdev->fw_loader.dynamic_loader;
3790
3791         /*
3792          * here we update initial values for few specific dynamic regs (as
3793          * before reading the first descriptor from FW those value has to be
3794          * hard-coded) in later stages of the protocol those values will be
3795          * updated automatically by reading the FW descriptor so data there
3796          * will always be up-to-date
3797          */
3798         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3799         dyn_regs->kmd_msg_to_cpu =
3800                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3801         dyn_regs->cpu_cmd_status_to_host =
3802                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3803
3804         dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3805 }
3806
3807 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3808 {
3809         struct static_fw_load_mgr *static_loader;
3810
3811         static_loader = &hdev->fw_loader.static_loader;
3812
3813         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3814         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3815         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3816         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3817         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3818         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3819         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3820         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3821         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3822         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3823         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3824         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3825         static_loader->cpu_reset_wait_msec = hdev->pldm ?
3826                         GAUDI_PLDM_RESET_WAIT_MSEC :
3827                         GAUDI_CPU_RESET_WAIT_MSEC;
3828 }
3829
3830 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3831 {
3832         struct asic_fixed_properties *prop = &hdev->asic_prop;
3833         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3834
3835         /* fill common fields */
3836         fw_loader->linux_loaded = false;
3837         fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3838         fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3839         fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3840         fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3841         fw_loader->skip_bmc = !hdev->bmc_enable;
3842         fw_loader->sram_bar_id = SRAM_BAR_ID;
3843         fw_loader->dram_bar_id = HBM_BAR_ID;
3844
3845         if (prop->dynamic_fw_load)
3846                 gaudi_init_dynamic_firmware_loader(hdev);
3847         else
3848                 gaudi_init_static_firmware_loader(hdev);
3849 }
3850
3851 static int gaudi_init_cpu(struct hl_device *hdev)
3852 {
3853         struct gaudi_device *gaudi = hdev->asic_specific;
3854         int rc;
3855
3856         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3857                 return 0;
3858
3859         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3860                 return 0;
3861
3862         /*
3863          * The device CPU works with 40 bits addresses.
3864          * This register sets the extension to 50 bits.
3865          */
3866         if (!hdev->asic_prop.fw_security_enabled)
3867                 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3868
3869         rc = hl_fw_init_cpu(hdev);
3870
3871         if (rc)
3872                 return rc;
3873
3874         gaudi->hw_cap_initialized |= HW_CAP_CPU;
3875
3876         return 0;
3877 }
3878
3879 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3880 {
3881         struct cpu_dyn_regs *dyn_regs =
3882                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3883         struct asic_fixed_properties *prop = &hdev->asic_prop;
3884         struct gaudi_device *gaudi = hdev->asic_specific;
3885         u32 status, irq_handler_offset;
3886         struct hl_eq *eq;
3887         struct hl_hw_queue *cpu_pq =
3888                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3889         int err;
3890
3891         if (!hdev->cpu_queues_enable)
3892                 return 0;
3893
3894         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3895                 return 0;
3896
3897         eq = &hdev->event_queue;
3898
3899         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3900         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3901
3902         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3903         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3904
3905         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3906                         lower_32_bits(hdev->cpu_accessible_dma_address));
3907         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3908                         upper_32_bits(hdev->cpu_accessible_dma_address));
3909
3910         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3911         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3912         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3913
3914         /* Used for EQ CI */
3915         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3916
3917         WREG32(mmCPU_IF_PF_PQ_PI, 0);
3918
3919         if (gaudi->multi_msi_mode)
3920                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3921         else
3922                 WREG32(mmCPU_IF_QUEUE_INIT,
3923                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3924
3925         irq_handler_offset = prop->gic_interrupts_enable ?
3926                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3927                         le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3928
3929         WREG32(irq_handler_offset,
3930                 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3931
3932         err = hl_poll_timeout(
3933                 hdev,
3934                 mmCPU_IF_QUEUE_INIT,
3935                 status,
3936                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3937                 1000,
3938                 cpu_timeout);
3939
3940         if (err) {
3941                 dev_err(hdev->dev,
3942                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3943                 return -EIO;
3944         }
3945
3946         /* update FW application security bits */
3947         if (prop->fw_cpu_boot_dev_sts0_valid)
3948                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3949         if (prop->fw_cpu_boot_dev_sts1_valid)
3950                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3951
3952         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3953         return 0;
3954 }
3955
3956 static void gaudi_pre_hw_init(struct hl_device *hdev)
3957 {
3958         /* Perform read from the device to make sure device is up */
3959         RREG32(mmHW_STATE);
3960
3961         if (!hdev->asic_prop.fw_security_enabled) {
3962                 /* Set the access through PCI bars (Linux driver only) as
3963                  * secured
3964                  */
3965                 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3966                                 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3967                                 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3968
3969                 /* Perform read to flush the waiting writes to ensure
3970                  * configuration was set in the device
3971                  */
3972                 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3973         }
3974
3975         /*
3976          * Let's mark in the H/W that we have reached this point. We check
3977          * this value in the reset_before_init function to understand whether
3978          * we need to reset the chip before doing H/W init. This register is
3979          * cleared by the H/W upon H/W reset
3980          */
3981         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3982 }
3983
3984 static int gaudi_hw_init(struct hl_device *hdev)
3985 {
3986         struct gaudi_device *gaudi = hdev->asic_specific;
3987         int rc;
3988
3989         gaudi_pre_hw_init(hdev);
3990
3991         /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3992          * So we set it here and if anyone tries to move it later to
3993          * a different address, there will be an error
3994          */
3995         if (hdev->asic_prop.iatu_done_by_fw)
3996                 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3997
3998         /*
3999          * Before pushing u-boot/linux to device, need to set the hbm bar to
4000          * base address of dram
4001          */
4002         if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
4003                 dev_err(hdev->dev,
4004                         "failed to map HBM bar to DRAM base address\n");
4005                 return -EIO;
4006         }
4007
4008         rc = gaudi_init_cpu(hdev);
4009         if (rc) {
4010                 dev_err(hdev->dev, "failed to initialize CPU\n");
4011                 return rc;
4012         }
4013
4014         /* In case the clock gating was enabled in preboot we need to disable
4015          * it here before touching the MME/TPC registers.
4016          * There is no need to take clk gating mutex because when this function
4017          * runs, no other relevant code can run
4018          */
4019         hdev->asic_funcs->disable_clock_gating(hdev);
4020
4021         /* SRAM scrambler must be initialized after CPU is running from HBM */
4022         gaudi_init_scrambler_sram(hdev);
4023
4024         /* This is here just in case we are working without CPU */
4025         gaudi_init_scrambler_hbm(hdev);
4026
4027         gaudi_init_golden_registers(hdev);
4028
4029         rc = gaudi_mmu_init(hdev);
4030         if (rc)
4031                 return rc;
4032
4033         gaudi_init_security(hdev);
4034
4035         gaudi_init_pci_dma_qmans(hdev);
4036
4037         gaudi_init_hbm_dma_qmans(hdev);
4038
4039         gaudi_init_mme_qmans(hdev);
4040
4041         gaudi_init_tpc_qmans(hdev);
4042
4043         gaudi_init_nic_qmans(hdev);
4044
4045         hdev->asic_funcs->set_clock_gating(hdev);
4046
4047         gaudi_enable_timestamp(hdev);
4048
4049         /* MSI must be enabled before CPU queues and NIC are initialized */
4050         rc = gaudi_enable_msi(hdev);
4051         if (rc)
4052                 goto disable_queues;
4053
4054         /* must be called after MSI was enabled */
4055         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
4056         if (rc) {
4057                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
4058                         rc);
4059                 goto disable_msi;
4060         }
4061
4062         /* Perform read from the device to flush all configuration */
4063         RREG32(mmHW_STATE);
4064
4065         return 0;
4066
4067 disable_msi:
4068         gaudi_disable_msi(hdev);
4069 disable_queues:
4070         gaudi_disable_mme_qmans(hdev);
4071         gaudi_disable_pci_dma_qmans(hdev);
4072
4073         return rc;
4074 }
4075
4076 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
4077 {
4078         struct cpu_dyn_regs *dyn_regs =
4079                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4080         u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4081         struct gaudi_device *gaudi = hdev->asic_specific;
4082         bool driver_performs_reset;
4083
4084         if (!hard_reset) {
4085                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4086                 return;
4087         }
4088
4089         if (hdev->pldm) {
4090                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4091                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4092         } else {
4093                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4094                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4095         }
4096
4097         driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4098                                         !hdev->asic_prop.hard_reset_done_by_fw);
4099
4100         /* Set device to handle FLR by H/W as we will put the device CPU to
4101          * halt mode
4102          */
4103         if (driver_performs_reset)
4104                 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4105                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4106
4107         /* If linux is loaded in the device CPU we need to communicate with it
4108          * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4109          * registers in case of old F/Ws
4110          */
4111         if (hdev->fw_loader.linux_loaded) {
4112                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4113                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4114                                 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4115
4116                 WREG32(irq_handler_offset,
4117                         gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4118         } else {
4119                 if (hdev->asic_prop.hard_reset_done_by_fw)
4120                         hl_fw_ask_hard_reset_without_linux(hdev);
4121                 else
4122                         hl_fw_ask_halt_machine_without_linux(hdev);
4123         }
4124
4125         if (driver_performs_reset) {
4126
4127                 /* Configure the reset registers. Must be done as early as
4128                  * possible in case we fail during H/W initialization
4129                  */
4130                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4131                                                 (CFG_RST_H_DMA_MASK |
4132                                                 CFG_RST_H_MME_MASK |
4133                                                 CFG_RST_H_SM_MASK |
4134                                                 CFG_RST_H_TPC_7_MASK));
4135
4136                 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4137
4138                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4139                                                 (CFG_RST_H_HBM_MASK |
4140                                                 CFG_RST_H_TPC_7_MASK |
4141                                                 CFG_RST_H_NIC_MASK |
4142                                                 CFG_RST_H_SM_MASK |
4143                                                 CFG_RST_H_DMA_MASK |
4144                                                 CFG_RST_H_MME_MASK |
4145                                                 CFG_RST_H_CPU_MASK |
4146                                                 CFG_RST_H_MMU_MASK));
4147
4148                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4149                                                 (CFG_RST_L_IF_MASK |
4150                                                 CFG_RST_L_PSOC_MASK |
4151                                                 CFG_RST_L_TPC_MASK));
4152
4153                 msleep(cpu_timeout_ms);
4154
4155                 /* Tell ASIC not to re-initialize PCIe */
4156                 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4157
4158                 /* Restart BTL/BLR upon hard-reset */
4159                 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4160
4161                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4162                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4163
4164                 dev_info(hdev->dev,
4165                         "Issued HARD reset command, going to wait %dms\n",
4166                         reset_timeout_ms);
4167         } else {
4168                 dev_info(hdev->dev,
4169                         "Firmware performs HARD reset, going to wait %dms\n",
4170                         reset_timeout_ms);
4171         }
4172
4173         /*
4174          * After hard reset, we can't poll the BTM_FSM register because the PSOC
4175          * itself is in reset. Need to wait until the reset is deasserted
4176          */
4177         msleep(reset_timeout_ms);
4178
4179         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4180         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
4181                 dev_err(hdev->dev,
4182                         "Timeout while waiting for device to reset 0x%x\n",
4183                         status);
4184
4185         if (gaudi) {
4186                 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
4187                                 HW_CAP_HBM | HW_CAP_PCI_DMA |
4188                                 HW_CAP_MME | HW_CAP_TPC_MASK |
4189                                 HW_CAP_HBM_DMA | HW_CAP_PLL |
4190                                 HW_CAP_NIC_MASK | HW_CAP_MMU |
4191                                 HW_CAP_SRAM_SCRAMBLER |
4192                                 HW_CAP_HBM_SCRAMBLER |
4193                                 HW_CAP_CLK_GATE);
4194
4195                 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4196
4197                 hdev->device_cpu_is_halted = false;
4198         }
4199 }
4200
4201 static int gaudi_suspend(struct hl_device *hdev)
4202 {
4203         int rc;
4204
4205         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
4206         if (rc)
4207                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4208
4209         return rc;
4210 }
4211
4212 static int gaudi_resume(struct hl_device *hdev)
4213 {
4214         return gaudi_init_iatu(hdev);
4215 }
4216
4217 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4218                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
4219 {
4220         int rc;
4221
4222         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4223                         VM_DONTCOPY | VM_NORESERVE;
4224
4225         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4226                                 (dma_addr - HOST_PHYS_BASE), size);
4227         if (rc)
4228                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4229
4230         return rc;
4231 }
4232
4233 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4234 {
4235         struct cpu_dyn_regs *dyn_regs =
4236                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4237         u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4238         struct gaudi_device *gaudi = hdev->asic_specific;
4239         bool invalid_queue = false;
4240         int dma_id;
4241
4242         switch (hw_queue_id) {
4243         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4244                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4245                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4247                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4248                 break;
4249
4250         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4251                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4252                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4253                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4254                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4255                 break;
4256
4257         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4258                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4259                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4260                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4261                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4262                 break;
4263
4264         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4265                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4266                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4267                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4268                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4269                 break;
4270
4271         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4272                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4273                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4274                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4275                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4276                 break;
4277
4278         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4279                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4280                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4281                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4282                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4283                 break;
4284
4285         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4286                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4287                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4288                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4289                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4290                 break;
4291
4292         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4293                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4294                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4295                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4296                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4297                 break;
4298
4299         case GAUDI_QUEUE_ID_CPU_PQ:
4300                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4301                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
4302                 else
4303                         invalid_queue = true;
4304                 break;
4305
4306         case GAUDI_QUEUE_ID_MME_0_0:
4307                 db_reg_offset = mmMME2_QM_PQ_PI_0;
4308                 break;
4309
4310         case GAUDI_QUEUE_ID_MME_0_1:
4311                 db_reg_offset = mmMME2_QM_PQ_PI_1;
4312                 break;
4313
4314         case GAUDI_QUEUE_ID_MME_0_2:
4315                 db_reg_offset = mmMME2_QM_PQ_PI_2;
4316                 break;
4317
4318         case GAUDI_QUEUE_ID_MME_0_3:
4319                 db_reg_offset = mmMME2_QM_PQ_PI_3;
4320                 break;
4321
4322         case GAUDI_QUEUE_ID_MME_1_0:
4323                 db_reg_offset = mmMME0_QM_PQ_PI_0;
4324                 break;
4325
4326         case GAUDI_QUEUE_ID_MME_1_1:
4327                 db_reg_offset = mmMME0_QM_PQ_PI_1;
4328                 break;
4329
4330         case GAUDI_QUEUE_ID_MME_1_2:
4331                 db_reg_offset = mmMME0_QM_PQ_PI_2;
4332                 break;
4333
4334         case GAUDI_QUEUE_ID_MME_1_3:
4335                 db_reg_offset = mmMME0_QM_PQ_PI_3;
4336                 break;
4337
4338         case GAUDI_QUEUE_ID_TPC_0_0:
4339                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4340                 break;
4341
4342         case GAUDI_QUEUE_ID_TPC_0_1:
4343                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4344                 break;
4345
4346         case GAUDI_QUEUE_ID_TPC_0_2:
4347                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4348                 break;
4349
4350         case GAUDI_QUEUE_ID_TPC_0_3:
4351                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4352                 break;
4353
4354         case GAUDI_QUEUE_ID_TPC_1_0:
4355                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4356                 break;
4357
4358         case GAUDI_QUEUE_ID_TPC_1_1:
4359                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4360                 break;
4361
4362         case GAUDI_QUEUE_ID_TPC_1_2:
4363                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4364                 break;
4365
4366         case GAUDI_QUEUE_ID_TPC_1_3:
4367                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4368                 break;
4369
4370         case GAUDI_QUEUE_ID_TPC_2_0:
4371                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4372                 break;
4373
4374         case GAUDI_QUEUE_ID_TPC_2_1:
4375                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4376                 break;
4377
4378         case GAUDI_QUEUE_ID_TPC_2_2:
4379                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4380                 break;
4381
4382         case GAUDI_QUEUE_ID_TPC_2_3:
4383                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4384                 break;
4385
4386         case GAUDI_QUEUE_ID_TPC_3_0:
4387                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4388                 break;
4389
4390         case GAUDI_QUEUE_ID_TPC_3_1:
4391                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4392                 break;
4393
4394         case GAUDI_QUEUE_ID_TPC_3_2:
4395                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4396                 break;
4397
4398         case GAUDI_QUEUE_ID_TPC_3_3:
4399                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4400                 break;
4401
4402         case GAUDI_QUEUE_ID_TPC_4_0:
4403                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4404                 break;
4405
4406         case GAUDI_QUEUE_ID_TPC_4_1:
4407                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4408                 break;
4409
4410         case GAUDI_QUEUE_ID_TPC_4_2:
4411                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4412                 break;
4413
4414         case GAUDI_QUEUE_ID_TPC_4_3:
4415                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4416                 break;
4417
4418         case GAUDI_QUEUE_ID_TPC_5_0:
4419                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4420                 break;
4421
4422         case GAUDI_QUEUE_ID_TPC_5_1:
4423                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4424                 break;
4425
4426         case GAUDI_QUEUE_ID_TPC_5_2:
4427                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4428                 break;
4429
4430         case GAUDI_QUEUE_ID_TPC_5_3:
4431                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4432                 break;
4433
4434         case GAUDI_QUEUE_ID_TPC_6_0:
4435                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4436                 break;
4437
4438         case GAUDI_QUEUE_ID_TPC_6_1:
4439                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4440                 break;
4441
4442         case GAUDI_QUEUE_ID_TPC_6_2:
4443                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4444                 break;
4445
4446         case GAUDI_QUEUE_ID_TPC_6_3:
4447                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4448                 break;
4449
4450         case GAUDI_QUEUE_ID_TPC_7_0:
4451                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4452                 break;
4453
4454         case GAUDI_QUEUE_ID_TPC_7_1:
4455                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4456                 break;
4457
4458         case GAUDI_QUEUE_ID_TPC_7_2:
4459                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4460                 break;
4461
4462         case GAUDI_QUEUE_ID_TPC_7_3:
4463                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4464                 break;
4465
4466         case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4467                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4468                         invalid_queue = true;
4469
4470                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4471                 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4472                 break;
4473
4474         case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4475                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4476                         invalid_queue = true;
4477
4478                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4479                 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4480                 break;
4481
4482         case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4483                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4484                         invalid_queue = true;
4485
4486                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4487                 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4488                 break;
4489
4490         case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4491                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4492                         invalid_queue = true;
4493
4494                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4495                 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4496                 break;
4497
4498         case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4499                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4500                         invalid_queue = true;
4501
4502                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4503                 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4504                 break;
4505
4506         case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4507                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4508                         invalid_queue = true;
4509
4510                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4511                 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4512                 break;
4513
4514         case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4515                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4516                         invalid_queue = true;
4517
4518                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4519                 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4520                 break;
4521
4522         case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4523                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4524                         invalid_queue = true;
4525
4526                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4527                 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4528                 break;
4529
4530         case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4531                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4532                         invalid_queue = true;
4533
4534                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4535                 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4536                 break;
4537
4538         case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4539                 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4540                         invalid_queue = true;
4541
4542                 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4543                 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4544                 break;
4545
4546         default:
4547                 invalid_queue = true;
4548         }
4549
4550         if (invalid_queue) {
4551                 /* Should never get here */
4552                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4553                         hw_queue_id);
4554                 return;
4555         }
4556
4557         db_value = pi;
4558
4559         /* ring the doorbell */
4560         WREG32(db_reg_offset, db_value);
4561
4562         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4563                 /* make sure device CPU will read latest data from host */
4564                 mb();
4565
4566                 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4567                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4568                                 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4569
4570                 WREG32(irq_handler_offset,
4571                         gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4572         }
4573 }
4574
4575 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4576                                 struct hl_bd *bd)
4577 {
4578         __le64 *pbd = (__le64 *) bd;
4579
4580         /* The QMANs are on the host memory so a simple copy suffice */
4581         pqe[0] = pbd[0];
4582         pqe[1] = pbd[1];
4583 }
4584
4585 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4586                                         dma_addr_t *dma_handle, gfp_t flags)
4587 {
4588         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4589                                                 dma_handle, flags);
4590
4591         /* Shift to the device's base physical address of host memory */
4592         if (kernel_addr)
4593                 *dma_handle += HOST_PHYS_BASE;
4594
4595         return kernel_addr;
4596 }
4597
4598 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4599                 void *cpu_addr, dma_addr_t dma_handle)
4600 {
4601         /* Cancel the device's base physical address of host memory */
4602         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4603
4604         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4605 }
4606
4607 static int gaudi_hbm_scrubbing(struct hl_device *hdev)
4608 {
4609         struct asic_fixed_properties *prop = &hdev->asic_prop;
4610         u64  cur_addr = DRAM_BASE_ADDR_USER;
4611         u32 val;
4612         u32 chunk_size;
4613         int rc, dma_id;
4614
4615         while (cur_addr < prop->dram_end_address) {
4616                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4617                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4618
4619                         chunk_size =
4620                         min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4621
4622                         dev_dbg(hdev->dev,
4623                                 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4624                                 cur_addr, cur_addr + chunk_size);
4625
4626                         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
4627                         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
4628                         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4629                                                 lower_32_bits(cur_addr));
4630                         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4631                                                 upper_32_bits(cur_addr));
4632                         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4633                                         chunk_size);
4634                         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4635                                         ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4636                                         (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4637
4638                         cur_addr += chunk_size;
4639
4640                         if (cur_addr == prop->dram_end_address)
4641                                 break;
4642                 }
4643
4644                 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4645                         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4646
4647                         rc = hl_poll_timeout(
4648                                 hdev,
4649                                 mmDMA0_CORE_STS0 + dma_offset,
4650                                 val,
4651                                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
4652                                 1000,
4653                                 HBM_SCRUBBING_TIMEOUT_US);
4654
4655                         if (rc) {
4656                                 dev_err(hdev->dev,
4657                                         "DMA Timeout during HBM scrubbing of DMA #%d\n",
4658                                         dma_id);
4659                                 return -EIO;
4660                         }
4661                 }
4662         }
4663
4664         return 0;
4665 }
4666
4667 static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
4668 {
4669         struct asic_fixed_properties *prop = &hdev->asic_prop;
4670         struct gaudi_device *gaudi = hdev->asic_specific;
4671         int rc = 0;
4672         u64 val = 0;
4673
4674         if (!hdev->memory_scrub)
4675                 return 0;
4676
4677         if (!addr && !size) {
4678                 /* Wait till device is idle */
4679                 rc = hl_poll_timeout(
4680                                 hdev,
4681                                 mmDMA0_CORE_STS0/* dummy */,
4682                                 val/* dummy */,
4683                                 (hdev->asic_funcs->is_device_idle(hdev, NULL,
4684                                                 0, NULL)),
4685                                                 1000,
4686                                                 HBM_SCRUBBING_TIMEOUT_US);
4687                 if (rc) {
4688                         dev_err(hdev->dev, "waiting for idle timeout\n");
4689                         return -EIO;
4690                 }
4691
4692                 /* Scrub SRAM */
4693                 addr = prop->sram_user_base_address;
4694                 size = hdev->pldm ? 0x10000 :
4695                                 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4696                 val = 0x7777777777777777ull;
4697
4698                 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4699                 if (rc) {
4700                         dev_err(hdev->dev,
4701                                 "Failed to clear SRAM in mem scrub all\n");
4702                         return rc;
4703                 }
4704
4705                 mutex_lock(&gaudi->clk_gate_mutex);
4706                 hdev->asic_funcs->disable_clock_gating(hdev);
4707
4708                 /* Scrub HBM using all DMA channels in parallel */
4709                 rc = gaudi_hbm_scrubbing(hdev);
4710                 if (rc)
4711                         dev_err(hdev->dev,
4712                                 "Failed to clear HBM in mem scrub all\n");
4713
4714                 hdev->asic_funcs->set_clock_gating(hdev);
4715                 mutex_unlock(&gaudi->clk_gate_mutex);
4716         }
4717
4718         return rc;
4719 }
4720
4721 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4722                                 u32 queue_id, dma_addr_t *dma_handle,
4723                                 u16 *queue_len)
4724 {
4725         struct gaudi_device *gaudi = hdev->asic_specific;
4726         struct gaudi_internal_qman_info *q;
4727
4728         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4729                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4730                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4731                 return NULL;
4732         }
4733
4734         q = &gaudi->internal_qmans[queue_id];
4735         *dma_handle = q->pq_dma_addr;
4736         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4737
4738         return q->pq_kernel_addr;
4739 }
4740
4741 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4742                                 u16 len, u32 timeout, u64 *result)
4743 {
4744         struct gaudi_device *gaudi = hdev->asic_specific;
4745
4746         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4747                 if (result)
4748                         *result = 0;
4749                 return 0;
4750         }
4751
4752         if (!timeout)
4753                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4754
4755         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4756                                                 timeout, result);
4757 }
4758
4759 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4760 {
4761         struct packet_msg_prot *fence_pkt;
4762         dma_addr_t pkt_dma_addr;
4763         u32 fence_val, tmp, timeout_usec;
4764         dma_addr_t fence_dma_addr;
4765         u32 *fence_ptr;
4766         int rc;
4767
4768         if (hdev->pldm)
4769                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4770         else
4771                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4772
4773         fence_val = GAUDI_QMAN0_FENCE_VAL;
4774
4775         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4776                                                         &fence_dma_addr);
4777         if (!fence_ptr) {
4778                 dev_err(hdev->dev,
4779                         "Failed to allocate memory for H/W queue %d testing\n",
4780                         hw_queue_id);
4781                 return -ENOMEM;
4782         }
4783
4784         *fence_ptr = 0;
4785
4786         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
4787                                         sizeof(struct packet_msg_prot),
4788                                         GFP_KERNEL, &pkt_dma_addr);
4789         if (!fence_pkt) {
4790                 dev_err(hdev->dev,
4791                         "Failed to allocate packet for H/W queue %d testing\n",
4792                         hw_queue_id);
4793                 rc = -ENOMEM;
4794                 goto free_fence_ptr;
4795         }
4796
4797         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4798         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4799         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4800
4801         fence_pkt->ctl = cpu_to_le32(tmp);
4802         fence_pkt->value = cpu_to_le32(fence_val);
4803         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4804
4805         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4806                                         sizeof(struct packet_msg_prot),
4807                                         pkt_dma_addr);
4808         if (rc) {
4809                 dev_err(hdev->dev,
4810                         "Failed to send fence packet to H/W queue %d\n",
4811                         hw_queue_id);
4812                 goto free_pkt;
4813         }
4814
4815         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4816                                         1000, timeout_usec, true);
4817
4818         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4819
4820         if (rc == -ETIMEDOUT) {
4821                 dev_err(hdev->dev,
4822                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4823                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4824                 rc = -EIO;
4825         }
4826
4827 free_pkt:
4828         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
4829                                         pkt_dma_addr);
4830 free_fence_ptr:
4831         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4832                                         fence_dma_addr);
4833         return rc;
4834 }
4835
4836 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4837 {
4838         struct gaudi_device *gaudi = hdev->asic_specific;
4839
4840         /*
4841          * check capability here as send_cpu_message() won't update the result
4842          * value if no capability
4843          */
4844         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4845                 return 0;
4846
4847         return hl_fw_test_cpu_queue(hdev);
4848 }
4849
4850 static int gaudi_test_queues(struct hl_device *hdev)
4851 {
4852         int i, rc, ret_val = 0;
4853
4854         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4855                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4856                         rc = gaudi_test_queue(hdev, i);
4857                         if (rc)
4858                                 ret_val = -EINVAL;
4859                 }
4860         }
4861
4862         rc = gaudi_test_cpu_queue(hdev);
4863         if (rc)
4864                 ret_val = -EINVAL;
4865
4866         return ret_val;
4867 }
4868
4869 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4870                 gfp_t mem_flags, dma_addr_t *dma_handle)
4871 {
4872         void *kernel_addr;
4873
4874         if (size > GAUDI_DMA_POOL_BLK_SIZE)
4875                 return NULL;
4876
4877         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4878
4879         /* Shift to the device's base physical address of host memory */
4880         if (kernel_addr)
4881                 *dma_handle += HOST_PHYS_BASE;
4882
4883         return kernel_addr;
4884 }
4885
4886 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4887                         dma_addr_t dma_addr)
4888 {
4889         /* Cancel the device's base physical address of host memory */
4890         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4891
4892         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4893 }
4894
4895 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4896                                         size_t size, dma_addr_t *dma_handle)
4897 {
4898         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4899 }
4900
4901 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4902                                                 size_t size, void *vaddr)
4903 {
4904         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4905 }
4906
4907 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
4908                         int nents, enum dma_data_direction dir)
4909 {
4910         struct scatterlist *sg;
4911         int i;
4912
4913         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
4914                 return -ENOMEM;
4915
4916         /* Shift to the device's base physical address of host memory */
4917         for_each_sg(sgl, sg, nents, i)
4918                 sg->dma_address += HOST_PHYS_BASE;
4919
4920         return 0;
4921 }
4922
4923 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
4924                         int nents, enum dma_data_direction dir)
4925 {
4926         struct scatterlist *sg;
4927         int i;
4928
4929         /* Cancel the device's base physical address of host memory */
4930         for_each_sg(sgl, sg, nents, i)
4931                 sg->dma_address -= HOST_PHYS_BASE;
4932
4933         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
4934 }
4935
4936 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
4937                                         struct sg_table *sgt)
4938 {
4939         struct scatterlist *sg, *sg_next_iter;
4940         u32 count, dma_desc_cnt;
4941         u64 len, len_next;
4942         dma_addr_t addr, addr_next;
4943
4944         dma_desc_cnt = 0;
4945
4946         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4947
4948                 len = sg_dma_len(sg);
4949                 addr = sg_dma_address(sg);
4950
4951                 if (len == 0)
4952                         break;
4953
4954                 while ((count + 1) < sgt->nents) {
4955                         sg_next_iter = sg_next(sg);
4956                         len_next = sg_dma_len(sg_next_iter);
4957                         addr_next = sg_dma_address(sg_next_iter);
4958
4959                         if (len_next == 0)
4960                                 break;
4961
4962                         if ((addr + len == addr_next) &&
4963                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4964                                 len += len_next;
4965                                 count++;
4966                                 sg = sg_next_iter;
4967                         } else {
4968                                 break;
4969                         }
4970                 }
4971
4972                 dma_desc_cnt++;
4973         }
4974
4975         return dma_desc_cnt * sizeof(struct packet_lin_dma);
4976 }
4977
4978 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4979                                 struct hl_cs_parser *parser,
4980                                 struct packet_lin_dma *user_dma_pkt,
4981                                 u64 addr, enum dma_data_direction dir)
4982 {
4983         struct hl_userptr *userptr;
4984         int rc;
4985
4986         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4987                         parser->job_userptr_list, &userptr))
4988                 goto already_pinned;
4989
4990         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4991         if (!userptr)
4992                 return -ENOMEM;
4993
4994         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4995                                 userptr);
4996         if (rc)
4997                 goto free_userptr;
4998
4999         list_add_tail(&userptr->job_node, parser->job_userptr_list);
5000
5001         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
5002                                         userptr->sgt->nents, dir);
5003         if (rc) {
5004                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
5005                 goto unpin_memory;
5006         }
5007
5008         userptr->dma_mapped = true;
5009         userptr->dir = dir;
5010
5011 already_pinned:
5012         parser->patched_cb_size +=
5013                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
5014
5015         return 0;
5016
5017 unpin_memory:
5018         list_del(&userptr->job_node);
5019         hl_unpin_host_memory(hdev, userptr);
5020 free_userptr:
5021         kfree(userptr);
5022         return rc;
5023 }
5024
5025 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
5026                                 struct hl_cs_parser *parser,
5027                                 struct packet_lin_dma *user_dma_pkt,
5028                                 bool src_in_host)
5029 {
5030         enum dma_data_direction dir;
5031         bool skip_host_mem_pin = false, user_memset;
5032         u64 addr;
5033         int rc = 0;
5034
5035         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
5036                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5037                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5038
5039         if (src_in_host) {
5040                 if (user_memset)
5041                         skip_host_mem_pin = true;
5042
5043                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
5044                 dir = DMA_TO_DEVICE;
5045                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5046         } else {
5047                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
5048                 dir = DMA_FROM_DEVICE;
5049                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5050                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5051                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5052         }
5053
5054         if (skip_host_mem_pin)
5055                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5056         else
5057                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
5058                                                 addr, dir);
5059
5060         return rc;
5061 }
5062
5063 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
5064                                 struct hl_cs_parser *parser,
5065                                 struct packet_lin_dma *user_dma_pkt)
5066 {
5067         bool src_in_host = false;
5068         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
5069                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
5070                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
5071
5072         dev_dbg(hdev->dev, "DMA packet details:\n");
5073         dev_dbg(hdev->dev, "source == 0x%llx\n",
5074                                 le64_to_cpu(user_dma_pkt->src_addr));
5075         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
5076         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
5077
5078         /*
5079          * Special handling for DMA with size 0. Bypass all validations
5080          * because no transactions will be done except for WR_COMP, which
5081          * is not a security issue
5082          */
5083         if (!le32_to_cpu(user_dma_pkt->tsize)) {
5084                 parser->patched_cb_size += sizeof(*user_dma_pkt);
5085                 return 0;
5086         }
5087
5088         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5089                 src_in_host = true;
5090
5091         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5092                                                 src_in_host);
5093 }
5094
5095 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5096                                         struct hl_cs_parser *parser,
5097                                         struct packet_load_and_exe *user_pkt)
5098 {
5099         u32 cfg;
5100
5101         cfg = le32_to_cpu(user_pkt->cfg);
5102
5103         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5104                 dev_err(hdev->dev,
5105                         "User not allowed to use Load and Execute\n");
5106                 return -EPERM;
5107         }
5108
5109         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5110
5111         return 0;
5112 }
5113
5114 static int gaudi_validate_cb(struct hl_device *hdev,
5115                         struct hl_cs_parser *parser, bool is_mmu)
5116 {
5117         u32 cb_parsed_length = 0;
5118         int rc = 0;
5119
5120         parser->patched_cb_size = 0;
5121
5122         /* cb_user_size is more than 0 so loop will always be executed */
5123         while (cb_parsed_length < parser->user_cb_size) {
5124                 enum packet_id pkt_id;
5125                 u16 pkt_size;
5126                 struct gaudi_packet *user_pkt;
5127
5128                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5129
5130                 pkt_id = (enum packet_id) (
5131                                 (le64_to_cpu(user_pkt->header) &
5132                                 PACKET_HEADER_PACKET_ID_MASK) >>
5133                                         PACKET_HEADER_PACKET_ID_SHIFT);
5134
5135                 if (!validate_packet_id(pkt_id)) {
5136                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5137                         rc = -EINVAL;
5138                         break;
5139                 }
5140
5141                 pkt_size = gaudi_packet_sizes[pkt_id];
5142                 cb_parsed_length += pkt_size;
5143                 if (cb_parsed_length > parser->user_cb_size) {
5144                         dev_err(hdev->dev,
5145                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5146                         rc = -EINVAL;
5147                         break;
5148                 }
5149
5150                 switch (pkt_id) {
5151                 case PACKET_MSG_PROT:
5152                         dev_err(hdev->dev,
5153                                 "User not allowed to use MSG_PROT\n");
5154                         rc = -EPERM;
5155                         break;
5156
5157                 case PACKET_CP_DMA:
5158                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5159                         rc = -EPERM;
5160                         break;
5161
5162                 case PACKET_STOP:
5163                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5164                         rc = -EPERM;
5165                         break;
5166
5167                 case PACKET_WREG_BULK:
5168                         dev_err(hdev->dev,
5169                                 "User not allowed to use WREG_BULK\n");
5170                         rc = -EPERM;
5171                         break;
5172
5173                 case PACKET_LOAD_AND_EXE:
5174                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5175                                 (struct packet_load_and_exe *) user_pkt);
5176                         break;
5177
5178                 case PACKET_LIN_DMA:
5179                         parser->contains_dma_pkt = true;
5180                         if (is_mmu)
5181                                 parser->patched_cb_size += pkt_size;
5182                         else
5183                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5184                                         (struct packet_lin_dma *) user_pkt);
5185                         break;
5186
5187                 case PACKET_WREG_32:
5188                 case PACKET_MSG_LONG:
5189                 case PACKET_MSG_SHORT:
5190                 case PACKET_REPEAT:
5191                 case PACKET_FENCE:
5192                 case PACKET_NOP:
5193                 case PACKET_ARB_POINT:
5194                         parser->patched_cb_size += pkt_size;
5195                         break;
5196
5197                 default:
5198                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5199                                 pkt_id);
5200                         rc = -EINVAL;
5201                         break;
5202                 }
5203
5204                 if (rc)
5205                         break;
5206         }
5207
5208         /*
5209          * The new CB should have space at the end for two MSG_PROT packets:
5210          * 1. A packet that will act as a completion packet
5211          * 2. A packet that will generate MSI-X interrupt
5212          */
5213         if (parser->completion)
5214                 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
5215
5216         return rc;
5217 }
5218
5219 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5220                                 struct hl_cs_parser *parser,
5221                                 struct packet_lin_dma *user_dma_pkt,
5222                                 struct packet_lin_dma *new_dma_pkt,
5223                                 u32 *new_dma_pkt_size)
5224 {
5225         struct hl_userptr *userptr;
5226         struct scatterlist *sg, *sg_next_iter;
5227         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5228         u64 len, len_next;
5229         dma_addr_t dma_addr, dma_addr_next;
5230         u64 device_memory_addr, addr;
5231         enum dma_data_direction dir;
5232         struct sg_table *sgt;
5233         bool src_in_host = false;
5234         bool skip_host_mem_pin = false;
5235         bool user_memset;
5236
5237         ctl = le32_to_cpu(user_dma_pkt->ctl);
5238
5239         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5240                 src_in_host = true;
5241
5242         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5243                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5244
5245         if (src_in_host) {
5246                 addr = le64_to_cpu(user_dma_pkt->src_addr);
5247                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5248                 dir = DMA_TO_DEVICE;
5249                 if (user_memset)
5250                         skip_host_mem_pin = true;
5251         } else {
5252                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5253                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5254                 dir = DMA_FROM_DEVICE;
5255         }
5256
5257         if ((!skip_host_mem_pin) &&
5258                 (!hl_userptr_is_pinned(hdev, addr,
5259                                         le32_to_cpu(user_dma_pkt->tsize),
5260                                         parser->job_userptr_list, &userptr))) {
5261                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5262                                 addr, user_dma_pkt->tsize);
5263                 return -EFAULT;
5264         }
5265
5266         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5267                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5268                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5269                 return 0;
5270         }
5271
5272         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5273
5274         sgt = userptr->sgt;
5275         dma_desc_cnt = 0;
5276
5277         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
5278                 len = sg_dma_len(sg);
5279                 dma_addr = sg_dma_address(sg);
5280
5281                 if (len == 0)
5282                         break;
5283
5284                 while ((count + 1) < sgt->nents) {
5285                         sg_next_iter = sg_next(sg);
5286                         len_next = sg_dma_len(sg_next_iter);
5287                         dma_addr_next = sg_dma_address(sg_next_iter);
5288
5289                         if (len_next == 0)
5290                                 break;
5291
5292                         if ((dma_addr + len == dma_addr_next) &&
5293                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5294                                 len += len_next;
5295                                 count++;
5296                                 sg = sg_next_iter;
5297                         } else {
5298                                 break;
5299                         }
5300                 }
5301
5302                 ctl = le32_to_cpu(user_dma_pkt->ctl);
5303                 if (likely(dma_desc_cnt))
5304                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5305                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5306                 new_dma_pkt->ctl = cpu_to_le32(ctl);
5307                 new_dma_pkt->tsize = cpu_to_le32(len);
5308
5309                 if (dir == DMA_TO_DEVICE) {
5310                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5311                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5312                 } else {
5313                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5314                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5315                 }
5316
5317                 if (!user_memset)
5318                         device_memory_addr += len;
5319                 dma_desc_cnt++;
5320                 new_dma_pkt++;
5321         }
5322
5323         if (!dma_desc_cnt) {
5324                 dev_err(hdev->dev,
5325                         "Error of 0 SG entries when patching DMA packet\n");
5326                 return -EFAULT;
5327         }
5328
5329         /* Fix the last dma packet - wrcomp must be as user set it */
5330         new_dma_pkt--;
5331         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5332
5333         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5334
5335         return 0;
5336 }
5337
5338 static int gaudi_patch_cb(struct hl_device *hdev,
5339                                 struct hl_cs_parser *parser)
5340 {
5341         u32 cb_parsed_length = 0;
5342         u32 cb_patched_cur_length = 0;
5343         int rc = 0;
5344
5345         /* cb_user_size is more than 0 so loop will always be executed */
5346         while (cb_parsed_length < parser->user_cb_size) {
5347                 enum packet_id pkt_id;
5348                 u16 pkt_size;
5349                 u32 new_pkt_size = 0;
5350                 struct gaudi_packet *user_pkt, *kernel_pkt;
5351
5352                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5353                 kernel_pkt = parser->patched_cb->kernel_address +
5354                                         cb_patched_cur_length;
5355
5356                 pkt_id = (enum packet_id) (
5357                                 (le64_to_cpu(user_pkt->header) &
5358                                 PACKET_HEADER_PACKET_ID_MASK) >>
5359                                         PACKET_HEADER_PACKET_ID_SHIFT);
5360
5361                 if (!validate_packet_id(pkt_id)) {
5362                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5363                         rc = -EINVAL;
5364                         break;
5365                 }
5366
5367                 pkt_size = gaudi_packet_sizes[pkt_id];
5368                 cb_parsed_length += pkt_size;
5369                 if (cb_parsed_length > parser->user_cb_size) {
5370                         dev_err(hdev->dev,
5371                                 "packet 0x%x is out of CB boundary\n", pkt_id);
5372                         rc = -EINVAL;
5373                         break;
5374                 }
5375
5376                 switch (pkt_id) {
5377                 case PACKET_LIN_DMA:
5378                         rc = gaudi_patch_dma_packet(hdev, parser,
5379                                         (struct packet_lin_dma *) user_pkt,
5380                                         (struct packet_lin_dma *) kernel_pkt,
5381                                         &new_pkt_size);
5382                         cb_patched_cur_length += new_pkt_size;
5383                         break;
5384
5385                 case PACKET_MSG_PROT:
5386                         dev_err(hdev->dev,
5387                                 "User not allowed to use MSG_PROT\n");
5388                         rc = -EPERM;
5389                         break;
5390
5391                 case PACKET_CP_DMA:
5392                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5393                         rc = -EPERM;
5394                         break;
5395
5396                 case PACKET_STOP:
5397                         dev_err(hdev->dev, "User not allowed to use STOP\n");
5398                         rc = -EPERM;
5399                         break;
5400
5401                 case PACKET_WREG_32:
5402                 case PACKET_WREG_BULK:
5403                 case PACKET_MSG_LONG:
5404                 case PACKET_MSG_SHORT:
5405                 case PACKET_REPEAT:
5406                 case PACKET_FENCE:
5407                 case PACKET_NOP:
5408                 case PACKET_ARB_POINT:
5409                 case PACKET_LOAD_AND_EXE:
5410                         memcpy(kernel_pkt, user_pkt, pkt_size);
5411                         cb_patched_cur_length += pkt_size;
5412                         break;
5413
5414                 default:
5415                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5416                                 pkt_id);
5417                         rc = -EINVAL;
5418                         break;
5419                 }
5420
5421                 if (rc)
5422                         break;
5423         }
5424
5425         return rc;
5426 }
5427
5428 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5429                 struct hl_cs_parser *parser)
5430 {
5431         u64 patched_cb_handle;
5432         u32 patched_cb_size;
5433         struct hl_cb *user_cb;
5434         int rc;
5435
5436         /*
5437          * The new CB should have space at the end for two MSG_PROT pkt:
5438          * 1. A packet that will act as a completion packet
5439          * 2. A packet that will generate MSI interrupt
5440          */
5441         if (parser->completion)
5442                 parser->patched_cb_size = parser->user_cb_size +
5443                                 sizeof(struct packet_msg_prot) * 2;
5444         else
5445                 parser->patched_cb_size = parser->user_cb_size;
5446
5447         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5448                                 parser->patched_cb_size, false, false,
5449                                 &patched_cb_handle);
5450
5451         if (rc) {
5452                 dev_err(hdev->dev,
5453                         "Failed to allocate patched CB for DMA CS %d\n",
5454                         rc);
5455                 return rc;
5456         }
5457
5458         patched_cb_handle >>= PAGE_SHIFT;
5459         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5460                                 (u32) patched_cb_handle);
5461         /* hl_cb_get should never fail */
5462         if (!parser->patched_cb) {
5463                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5464                         (u32) patched_cb_handle);
5465                 rc = -EFAULT;
5466                 goto out;
5467         }
5468
5469         /*
5470          * The check that parser->user_cb_size <= parser->user_cb->size was done
5471          * in validate_queue_index().
5472          */
5473         memcpy(parser->patched_cb->kernel_address,
5474                 parser->user_cb->kernel_address,
5475                 parser->user_cb_size);
5476
5477         patched_cb_size = parser->patched_cb_size;
5478
5479         /* Validate patched CB instead of user CB */
5480         user_cb = parser->user_cb;
5481         parser->user_cb = parser->patched_cb;
5482         rc = gaudi_validate_cb(hdev, parser, true);
5483         parser->user_cb = user_cb;
5484
5485         if (rc) {
5486                 hl_cb_put(parser->patched_cb);
5487                 goto out;
5488         }
5489
5490         if (patched_cb_size != parser->patched_cb_size) {
5491                 dev_err(hdev->dev, "user CB size mismatch\n");
5492                 hl_cb_put(parser->patched_cb);
5493                 rc = -EINVAL;
5494                 goto out;
5495         }
5496
5497 out:
5498         /*
5499          * Always call cb destroy here because we still have 1 reference
5500          * to it by calling cb_get earlier. After the job will be completed,
5501          * cb_put will release it, but here we want to remove it from the
5502          * idr
5503          */
5504         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5505                                         patched_cb_handle << PAGE_SHIFT);
5506
5507         return rc;
5508 }
5509
5510 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5511                 struct hl_cs_parser *parser)
5512 {
5513         u64 patched_cb_handle;
5514         int rc;
5515
5516         rc = gaudi_validate_cb(hdev, parser, false);
5517
5518         if (rc)
5519                 goto free_userptr;
5520
5521         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
5522                                 parser->patched_cb_size, false, false,
5523                                 &patched_cb_handle);
5524         if (rc) {
5525                 dev_err(hdev->dev,
5526                         "Failed to allocate patched CB for DMA CS %d\n", rc);
5527                 goto free_userptr;
5528         }
5529
5530         patched_cb_handle >>= PAGE_SHIFT;
5531         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
5532                                 (u32) patched_cb_handle);
5533         /* hl_cb_get should never fail here */
5534         if (!parser->patched_cb) {
5535                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
5536                                 (u32) patched_cb_handle);
5537                 rc = -EFAULT;
5538                 goto out;
5539         }
5540
5541         rc = gaudi_patch_cb(hdev, parser);
5542
5543         if (rc)
5544                 hl_cb_put(parser->patched_cb);
5545
5546 out:
5547         /*
5548          * Always call cb destroy here because we still have 1 reference
5549          * to it by calling cb_get earlier. After the job will be completed,
5550          * cb_put will release it, but here we want to remove it from the
5551          * idr
5552          */
5553         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
5554                                 patched_cb_handle << PAGE_SHIFT);
5555
5556 free_userptr:
5557         if (rc)
5558                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5559         return rc;
5560 }
5561
5562 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5563                                         struct hl_cs_parser *parser)
5564 {
5565         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5566         struct gaudi_device *gaudi = hdev->asic_specific;
5567         u32 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT +
5568                 ((parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2));
5569
5570         if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5571                         (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3) &&
5572                         (!(gaudi->hw_cap_initialized & nic_mask_q_id))) {
5573                 dev_err(hdev->dev, "h/w queue %d is disabled\n",
5574                                 parser->hw_queue_id);
5575                 return -EINVAL;
5576         }
5577
5578         /* For internal queue jobs just check if CB address is valid */
5579         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5580                                         parser->user_cb_size,
5581                                         asic_prop->sram_user_base_address,
5582                                         asic_prop->sram_end_address))
5583                 return 0;
5584
5585         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5586                                         parser->user_cb_size,
5587                                         asic_prop->dram_user_base_address,
5588                                         asic_prop->dram_end_address))
5589                 return 0;
5590
5591         /* PMMU and HPMMU addresses are equal, check only one of them */
5592         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5593                                         parser->user_cb_size,
5594                                         asic_prop->pmmu.start_addr,
5595                                         asic_prop->pmmu.end_addr))
5596                 return 0;
5597
5598         dev_err(hdev->dev,
5599                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5600                 parser->user_cb, parser->user_cb_size);
5601
5602         return -EFAULT;
5603 }
5604
5605 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5606 {
5607         struct gaudi_device *gaudi = hdev->asic_specific;
5608
5609         if (parser->queue_type == QUEUE_TYPE_INT)
5610                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5611
5612         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5613                 return gaudi_parse_cb_mmu(hdev, parser);
5614         else
5615                 return gaudi_parse_cb_no_mmu(hdev, parser);
5616 }
5617
5618 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
5619                                         void *kernel_address, u32 len,
5620                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
5621                                         bool eb)
5622 {
5623         struct gaudi_device *gaudi = hdev->asic_specific;
5624         struct packet_msg_prot *cq_pkt;
5625         u32 tmp;
5626
5627         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5628
5629         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5630         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5631
5632         if (eb)
5633                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5634
5635         cq_pkt->ctl = cpu_to_le32(tmp);
5636         cq_pkt->value = cpu_to_le32(cq_val);
5637         cq_pkt->addr = cpu_to_le64(cq_addr);
5638
5639         cq_pkt++;
5640
5641         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5642         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5643         cq_pkt->ctl = cpu_to_le32(tmp);
5644         cq_pkt->value = cpu_to_le32(1);
5645
5646         if (!gaudi->multi_msi_mode)
5647                 msi_vec = 0;
5648
5649         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
5650 }
5651
5652 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5653 {
5654         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5655 }
5656
5657 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5658                                         u32 size, u64 val)
5659 {
5660         struct packet_lin_dma *lin_dma_pkt;
5661         struct hl_cs_job *job;
5662         u32 cb_size, ctl, err_cause;
5663         struct hl_cb *cb;
5664         u64 id;
5665         int rc;
5666
5667         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5668         if (!cb)
5669                 return -EFAULT;
5670
5671         lin_dma_pkt = cb->kernel_address;
5672         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5673         cb_size = sizeof(*lin_dma_pkt);
5674
5675         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5676         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5677         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5678         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5679         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5680
5681         lin_dma_pkt->ctl = cpu_to_le32(ctl);
5682         lin_dma_pkt->src_addr = cpu_to_le64(val);
5683         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5684         lin_dma_pkt->tsize = cpu_to_le32(size);
5685
5686         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5687         if (!job) {
5688                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5689                 rc = -ENOMEM;
5690                 goto release_cb;
5691         }
5692
5693         /* Verify DMA is OK */
5694         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5695         if (err_cause && !hdev->init_done) {
5696                 dev_dbg(hdev->dev,
5697                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
5698                         err_cause);
5699                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5700         }
5701
5702         job->id = 0;
5703         job->user_cb = cb;
5704         atomic_inc(&job->user_cb->cs_cnt);
5705         job->user_cb_size = cb_size;
5706         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5707         job->patched_cb = job->user_cb;
5708         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5709
5710         hl_debugfs_add_job(hdev, job);
5711
5712         rc = gaudi_send_job_on_qman0(hdev, job);
5713         hl_debugfs_remove_job(hdev, job);
5714         kfree(job);
5715         atomic_dec(&cb->cs_cnt);
5716
5717         /* Verify DMA is OK */
5718         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5719         if (err_cause) {
5720                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5721                 rc = -EIO;
5722                 if (!hdev->init_done) {
5723                         dev_dbg(hdev->dev,
5724                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5725                                 err_cause);
5726                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5727                 }
5728         }
5729
5730 release_cb:
5731         id = cb->id;
5732         hl_cb_put(cb);
5733         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
5734
5735         return rc;
5736 }
5737
5738 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5739                                         u32 num_regs, u32 val)
5740 {
5741         struct packet_msg_long *pkt;
5742         struct hl_cs_job *job;
5743         u32 cb_size, ctl;
5744         struct hl_cb *cb;
5745         int i, rc;
5746
5747         cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5748
5749         if (cb_size > SZ_2M) {
5750                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5751                 return -ENOMEM;
5752         }
5753
5754         cb = hl_cb_kernel_create(hdev, cb_size, false);
5755         if (!cb)
5756                 return -EFAULT;
5757
5758         pkt = cb->kernel_address;
5759
5760         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5761         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5762         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5763         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5764         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5765
5766         for (i = 0; i < num_regs ; i++, pkt++) {
5767                 pkt->ctl = cpu_to_le32(ctl);
5768                 pkt->value = cpu_to_le32(val);
5769                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5770         }
5771
5772         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5773         if (!job) {
5774                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5775                 rc = -ENOMEM;
5776                 goto release_cb;
5777         }
5778
5779         job->id = 0;
5780         job->user_cb = cb;
5781         atomic_inc(&job->user_cb->cs_cnt);
5782         job->user_cb_size = cb_size;
5783         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5784         job->patched_cb = job->user_cb;
5785         job->job_cb_size = cb_size;
5786
5787         hl_debugfs_add_job(hdev, job);
5788
5789         rc = gaudi_send_job_on_qman0(hdev, job);
5790         hl_debugfs_remove_job(hdev, job);
5791         kfree(job);
5792         atomic_dec(&cb->cs_cnt);
5793
5794 release_cb:
5795         hl_cb_put(cb);
5796         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5797
5798         return rc;
5799 }
5800
5801 static int gaudi_schedule_register_memset(struct hl_device *hdev,
5802                 u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
5803 {
5804         struct hl_ctx *ctx;
5805         struct hl_pending_cb *pending_cb;
5806         struct packet_msg_long *pkt;
5807         u32 cb_size, ctl;
5808         struct hl_cb *cb;
5809         int i, rc;
5810
5811         mutex_lock(&hdev->fpriv_list_lock);
5812         ctx = hdev->compute_ctx;
5813
5814         /* If no compute context available or context is going down
5815          * memset registers directly
5816          */
5817         if (!ctx || kref_read(&ctx->refcount) == 0) {
5818                 rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
5819                 mutex_unlock(&hdev->fpriv_list_lock);
5820                 return rc;
5821         }
5822
5823         mutex_unlock(&hdev->fpriv_list_lock);
5824
5825         cb_size = (sizeof(*pkt) * num_regs) +
5826                         sizeof(struct packet_msg_prot) * 2;
5827
5828         if (cb_size > SZ_2M) {
5829                 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5830                 return -ENOMEM;
5831         }
5832
5833         pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
5834         if (!pending_cb)
5835                 return -ENOMEM;
5836
5837         cb = hl_cb_kernel_create(hdev, cb_size, false);
5838         if (!cb) {
5839                 kfree(pending_cb);
5840                 return -EFAULT;
5841         }
5842
5843         pkt = cb->kernel_address;
5844
5845         ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5846         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5847         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5848         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5849         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5850
5851         for (i = 0; i < num_regs ; i++, pkt++) {
5852                 pkt->ctl = cpu_to_le32(ctl);
5853                 pkt->value = cpu_to_le32(val);
5854                 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5855         }
5856
5857         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5858
5859         pending_cb->cb = cb;
5860         pending_cb->cb_size = cb_size;
5861         /* The queue ID MUST be an external queue ID. Otherwise, we will
5862          * have undefined behavior
5863          */
5864         pending_cb->hw_queue_id = hw_queue_id;
5865
5866         spin_lock(&ctx->pending_cb_lock);
5867         list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
5868         spin_unlock(&ctx->pending_cb_lock);
5869
5870         return 0;
5871 }
5872
5873 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5874 {
5875         u64 base_addr;
5876         u32 num_regs;
5877         int rc;
5878
5879         base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5880         num_regs = NUM_OF_SOB_IN_BLOCK;
5881         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5882         if (rc) {
5883                 dev_err(hdev->dev, "failed resetting SM registers");
5884                 return -ENOMEM;
5885         }
5886
5887         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5888         num_regs = NUM_OF_SOB_IN_BLOCK;
5889         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5890         if (rc) {
5891                 dev_err(hdev->dev, "failed resetting SM registers");
5892                 return -ENOMEM;
5893         }
5894
5895         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5896         num_regs = NUM_OF_SOB_IN_BLOCK;
5897         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5898         if (rc) {
5899                 dev_err(hdev->dev, "failed resetting SM registers");
5900                 return -ENOMEM;
5901         }
5902
5903         base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5904         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5905         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5906         if (rc) {
5907                 dev_err(hdev->dev, "failed resetting SM registers");
5908                 return -ENOMEM;
5909         }
5910
5911         base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5912         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5913         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5914         if (rc) {
5915                 dev_err(hdev->dev, "failed resetting SM registers");
5916                 return -ENOMEM;
5917         }
5918
5919         base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5920         num_regs = NUM_OF_MONITORS_IN_BLOCK;
5921         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5922         if (rc) {
5923                 dev_err(hdev->dev, "failed resetting SM registers");
5924                 return -ENOMEM;
5925         }
5926
5927         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5928                         (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5929         num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5930         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5931         if (rc) {
5932                 dev_err(hdev->dev, "failed resetting SM registers");
5933                 return -ENOMEM;
5934         }
5935
5936         base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5937                         (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5938         num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5939         rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5940         if (rc) {
5941                 dev_err(hdev->dev, "failed resetting SM registers");
5942                 return -ENOMEM;
5943         }
5944
5945         return 0;
5946 }
5947
5948 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5949 {
5950         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5951                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5952         int i;
5953
5954         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5955                 u64 sob_addr = CFG_BASE +
5956                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5957                                 (i * sob_delta);
5958                 u32 dma_offset = i * DMA_CORE_OFFSET;
5959
5960                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5961                                 lower_32_bits(sob_addr));
5962                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5963                                 upper_32_bits(sob_addr));
5964                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5965
5966                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5967                  * modified by the user for SRAM reduction
5968                  */
5969                 if (i > 1)
5970                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5971                                                                 0x00000001);
5972         }
5973 }
5974
5975 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5976 {
5977         u32 qman_offset;
5978         int i;
5979
5980         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5981                 qman_offset = i * DMA_QMAN_OFFSET;
5982                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5983         }
5984
5985         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5986                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5987                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5988         }
5989
5990         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5991                 qman_offset = i * TPC_QMAN_OFFSET;
5992                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5993         }
5994
5995         for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5996                 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5997                                 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5998                 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5999         }
6000 }
6001
6002 static int gaudi_restore_user_registers(struct hl_device *hdev)
6003 {
6004         int rc;
6005
6006         rc = gaudi_restore_sm_registers(hdev);
6007         if (rc)
6008                 return rc;
6009
6010         gaudi_restore_dma_registers(hdev);
6011         gaudi_restore_qm_registers(hdev);
6012
6013         return 0;
6014 }
6015
6016 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
6017 {
6018         return gaudi_restore_user_registers(hdev);
6019 }
6020
6021 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
6022 {
6023         struct asic_fixed_properties *prop = &hdev->asic_prop;
6024         struct gaudi_device *gaudi = hdev->asic_specific;
6025         u64 addr = prop->mmu_pgt_addr;
6026         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
6027
6028         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6029                 return 0;
6030
6031         return gaudi_memset_device_memory(hdev, addr, size, 0);
6032 }
6033
6034 static void gaudi_restore_phase_topology(struct hl_device *hdev)
6035 {
6036
6037 }
6038
6039 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr,
6040                         bool user_address, u32 *val)
6041 {
6042         struct asic_fixed_properties *prop = &hdev->asic_prop;
6043         struct gaudi_device *gaudi = hdev->asic_specific;
6044         u64 hbm_bar_addr, host_phys_end;
6045         int rc = 0;
6046
6047         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6048
6049         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6050
6051                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6052                                 (hdev->clock_gating_mask &
6053                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6054
6055                         dev_err_ratelimited(hdev->dev,
6056                                 "Can't read register - clock gating is enabled!\n");
6057                         rc = -EFAULT;
6058                 } else {
6059                         *val = RREG32(addr - CFG_BASE);
6060                 }
6061
6062         } else if ((addr >= SRAM_BASE_ADDR) &&
6063                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6064                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
6065                                 (addr - SRAM_BASE_ADDR));
6066         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6067                 u64 bar_base_addr = DRAM_PHYS_BASE +
6068                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6069
6070                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6071                 if (hbm_bar_addr != U64_MAX) {
6072                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
6073                                                 (addr - bar_base_addr));
6074
6075                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6076                                                 hbm_bar_addr);
6077                 }
6078                 if (hbm_bar_addr == U64_MAX)
6079                         rc = -EIO;
6080         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6081                         user_address && !iommu_present(&pci_bus_type)) {
6082                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
6083         } else {
6084                 rc = -EFAULT;
6085         }
6086
6087         return rc;
6088 }
6089
6090 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr,
6091                         bool user_address, u32 val)
6092 {
6093         struct asic_fixed_properties *prop = &hdev->asic_prop;
6094         struct gaudi_device *gaudi = hdev->asic_specific;
6095         u64 hbm_bar_addr, host_phys_end;
6096         int rc = 0;
6097
6098         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6099
6100         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
6101
6102                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6103                                 (hdev->clock_gating_mask &
6104                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6105
6106                         dev_err_ratelimited(hdev->dev,
6107                                 "Can't write register - clock gating is enabled!\n");
6108                         rc = -EFAULT;
6109                 } else {
6110                         WREG32(addr - CFG_BASE, val);
6111                 }
6112
6113         } else if ((addr >= SRAM_BASE_ADDR) &&
6114                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
6115                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
6116                                         (addr - SRAM_BASE_ADDR));
6117         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
6118                 u64 bar_base_addr = DRAM_PHYS_BASE +
6119                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6120
6121                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6122                 if (hbm_bar_addr != U64_MAX) {
6123                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
6124                                                 (addr - bar_base_addr));
6125
6126                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6127                                                 hbm_bar_addr);
6128                 }
6129                 if (hbm_bar_addr == U64_MAX)
6130                         rc = -EIO;
6131         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6132                         user_address && !iommu_present(&pci_bus_type)) {
6133                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6134         } else {
6135                 rc = -EFAULT;
6136         }
6137
6138         return rc;
6139 }
6140
6141 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr,
6142                                 bool user_address, u64 *val)
6143 {
6144         struct asic_fixed_properties *prop = &hdev->asic_prop;
6145         struct gaudi_device *gaudi = hdev->asic_specific;
6146         u64 hbm_bar_addr, host_phys_end;
6147         int rc = 0;
6148
6149         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6150
6151         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6152
6153                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6154                                 (hdev->clock_gating_mask &
6155                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6156
6157                         dev_err_ratelimited(hdev->dev,
6158                                 "Can't read register - clock gating is enabled!\n");
6159                         rc = -EFAULT;
6160                 } else {
6161                         u32 val_l = RREG32(addr - CFG_BASE);
6162                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
6163
6164                         *val = (((u64) val_h) << 32) | val_l;
6165                 }
6166
6167         } else if ((addr >= SRAM_BASE_ADDR) &&
6168                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6169                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
6170                                 (addr - SRAM_BASE_ADDR));
6171         } else if (addr <=
6172                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6173                 u64 bar_base_addr = DRAM_PHYS_BASE +
6174                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6175
6176                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6177                 if (hbm_bar_addr != U64_MAX) {
6178                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
6179                                                 (addr - bar_base_addr));
6180
6181                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6182                                                 hbm_bar_addr);
6183                 }
6184                 if (hbm_bar_addr == U64_MAX)
6185                         rc = -EIO;
6186         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6187                         user_address && !iommu_present(&pci_bus_type)) {
6188                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
6189         } else {
6190                 rc = -EFAULT;
6191         }
6192
6193         return rc;
6194 }
6195
6196 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr,
6197                                 bool user_address, u64 val)
6198 {
6199         struct asic_fixed_properties *prop = &hdev->asic_prop;
6200         struct gaudi_device *gaudi = hdev->asic_specific;
6201         u64 hbm_bar_addr, host_phys_end;
6202         int rc = 0;
6203
6204         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
6205
6206         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
6207
6208                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
6209                                 (hdev->clock_gating_mask &
6210                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
6211
6212                         dev_err_ratelimited(hdev->dev,
6213                                 "Can't write register - clock gating is enabled!\n");
6214                         rc = -EFAULT;
6215                 } else {
6216                         WREG32(addr - CFG_BASE, lower_32_bits(val));
6217                         WREG32(addr + sizeof(u32) - CFG_BASE,
6218                                 upper_32_bits(val));
6219                 }
6220
6221         } else if ((addr >= SRAM_BASE_ADDR) &&
6222                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
6223                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
6224                                         (addr - SRAM_BASE_ADDR));
6225         } else if (addr <=
6226                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
6227                 u64 bar_base_addr = DRAM_PHYS_BASE +
6228                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
6229
6230                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
6231                 if (hbm_bar_addr != U64_MAX) {
6232                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6233                                                 (addr - bar_base_addr));
6234
6235                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
6236                                                 hbm_bar_addr);
6237                 }
6238                 if (hbm_bar_addr == U64_MAX)
6239                         rc = -EIO;
6240         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
6241                         user_address && !iommu_present(&pci_bus_type)) {
6242                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
6243         } else {
6244                 rc = -EFAULT;
6245         }
6246
6247         return rc;
6248 }
6249
6250 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
6251                                         u32 size_to_dma, dma_addr_t dma_addr)
6252 {
6253         u32 err_cause, val;
6254         u64 dma_offset;
6255         int rc;
6256
6257         dma_offset = dma_id * DMA_CORE_OFFSET;
6258
6259         WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
6260         WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
6261         WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
6262         WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
6263         WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
6264         WREG32(mmDMA0_CORE_COMMIT + dma_offset,
6265                         (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
6266
6267         rc = hl_poll_timeout(
6268                 hdev,
6269                 mmDMA0_CORE_STS0 + dma_offset,
6270                 val,
6271                 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
6272                 0,
6273                 1000000);
6274
6275         if (rc) {
6276                 dev_err(hdev->dev,
6277                         "DMA %d timed-out during reading of 0x%llx\n",
6278                         dma_id, addr);
6279                 return -EIO;
6280         }
6281
6282         /* Verify DMA is OK */
6283         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6284         if (err_cause) {
6285                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
6286                 dev_dbg(hdev->dev,
6287                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6288                         err_cause);
6289                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6290
6291                 return -EIO;
6292         }
6293
6294         return 0;
6295 }
6296
6297 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
6298                                 void *blob_addr)
6299 {
6300         u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
6301         struct gaudi_device *gaudi = hdev->asic_specific;
6302         u64 dma_offset, qm_offset;
6303         dma_addr_t dma_addr;
6304         void *kernel_addr;
6305         bool is_eng_idle;
6306         int rc = 0, dma_id;
6307
6308         kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
6309                                                 hdev, SZ_2M,
6310                                                 &dma_addr,
6311                                                 GFP_KERNEL | __GFP_ZERO);
6312
6313         if (!kernel_addr)
6314                 return -ENOMEM;
6315
6316         mutex_lock(&gaudi->clk_gate_mutex);
6317
6318         hdev->asic_funcs->disable_clock_gating(hdev);
6319
6320         hdev->asic_funcs->hw_queues_lock(hdev);
6321
6322         dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
6323         dma_offset = dma_id * DMA_CORE_OFFSET;
6324         qm_offset = dma_id * DMA_QMAN_OFFSET;
6325         dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6326         is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6327
6328         if (!is_eng_idle) {
6329                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
6330                 dma_offset = dma_id * DMA_CORE_OFFSET;
6331                 qm_offset = dma_id * DMA_QMAN_OFFSET;
6332                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
6333                 is_eng_idle = IS_DMA_IDLE(dma_core_sts0);
6334
6335                 if (!is_eng_idle) {
6336                         dev_err_ratelimited(hdev->dev,
6337                                 "Can't read via DMA because it is BUSY\n");
6338                         rc = -EAGAIN;
6339                         goto out;
6340                 }
6341         }
6342
6343         cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
6344         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
6345                         0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
6346
6347         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6348          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6349          * ASID
6350          */
6351         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6352
6353         /* Verify DMA is OK */
6354         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6355         if (err_cause) {
6356                 dev_dbg(hdev->dev,
6357                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
6358                         err_cause);
6359                 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
6360         }
6361
6362         pos = 0;
6363         size_left = size;
6364         size_to_dma = SZ_2M;
6365
6366         while (size_left > 0) {
6367
6368                 if (size_left < SZ_2M)
6369                         size_to_dma = size_left;
6370
6371                 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6372                                                 dma_addr);
6373                 if (rc)
6374                         break;
6375
6376                 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6377
6378                 if (size_left <= SZ_2M)
6379                         break;
6380
6381                 pos += SZ_2M;
6382                 addr += SZ_2M;
6383                 size_left -= SZ_2M;
6384         }
6385
6386         /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6387          * using the compute ctx ASID, if exists. If not, use the kernel ctx
6388          * ASID
6389          */
6390         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6391                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6392
6393         WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6394
6395 out:
6396         hdev->asic_funcs->hw_queues_unlock(hdev);
6397
6398         hdev->asic_funcs->set_clock_gating(hdev);
6399
6400         mutex_unlock(&gaudi->clk_gate_mutex);
6401
6402         hdev->asic_funcs->asic_dma_free_coherent(hdev, SZ_2M, kernel_addr,
6403                                                 dma_addr);
6404
6405         return rc;
6406 }
6407
6408 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6409 {
6410         struct gaudi_device *gaudi = hdev->asic_specific;
6411
6412         if (hdev->hard_reset_pending)
6413                 return U64_MAX;
6414
6415         return readq(hdev->pcie_bar[HBM_BAR_ID] +
6416                         (addr - gaudi->hbm_bar_cur_addr));
6417 }
6418
6419 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6420 {
6421         struct gaudi_device *gaudi = hdev->asic_specific;
6422
6423         if (hdev->hard_reset_pending)
6424                 return;
6425
6426         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6427                         (addr - gaudi->hbm_bar_cur_addr));
6428 }
6429
6430 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6431 {
6432         /* mask to zero the MMBP and ASID bits */
6433         WREG32_AND(reg, ~0x7FF);
6434         WREG32_OR(reg, asid);
6435 }
6436
6437 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6438 {
6439         struct gaudi_device *gaudi = hdev->asic_specific;
6440
6441         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6442                 return;
6443
6444         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6445                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6446                 return;
6447         }
6448
6449         mutex_lock(&gaudi->clk_gate_mutex);
6450
6451         hdev->asic_funcs->disable_clock_gating(hdev);
6452
6453         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6454         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6455         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6456         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6457         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6458
6459         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6460         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6461         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6462         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6463         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6464
6465         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6466         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6467         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6468         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6469         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6470
6471         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6472         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6473         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6474         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6475         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6476
6477         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6478         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6479         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6480         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6481         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6482
6483         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6484         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6485         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6486         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6487         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6488
6489         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6490         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6491         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6492         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6493         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6494
6495         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6496         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6497         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6498         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6499         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6500
6501         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6502         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6503         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6504         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6505         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6506         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6507         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6508         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6509
6510         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6511         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6512         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6513         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6514         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6515         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6516         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6517
6518         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6519         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6520         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6521         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6522         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6523         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6524         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6525
6526         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6527         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6528         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6529         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6530         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6531         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6532         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6533
6534         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6535         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6536         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6537         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6538         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6539         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6540         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6541
6542         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6543         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6544         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6545         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6546         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6547         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6548         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6549
6550         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6551         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6552         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6553         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6554         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6555         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6556         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6557
6558         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6559         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6560         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6561         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6562         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6563         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6564         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6565
6566         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6567         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6568         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6569         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6570         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6571         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6572         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6573
6574         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6575         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6576         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6577         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6578         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6579         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6580         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6581         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6582         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6583         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6584
6585         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6586         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6587         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6588         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6589         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6590         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6591         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6592         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6593         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6594         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6595         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6596         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6597
6598         if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6599                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6600                                 asid);
6601                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6602                                 asid);
6603                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6604                                 asid);
6605                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6606                                 asid);
6607                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6608                                 asid);
6609         }
6610
6611         if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6612                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6613                                 asid);
6614                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6615                                 asid);
6616                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6617                                 asid);
6618                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6619                                 asid);
6620                 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6621                                 asid);
6622         }
6623
6624         if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6625                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6626                                 asid);
6627                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6628                                 asid);
6629                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6630                                 asid);
6631                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6632                                 asid);
6633                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6634                                 asid);
6635         }
6636
6637         if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6638                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6639                                 asid);
6640                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6641                                 asid);
6642                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6643                                 asid);
6644                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6645                                 asid);
6646                 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6647                                 asid);
6648         }
6649
6650         if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6651                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6652                                 asid);
6653                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6654                                 asid);
6655                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6656                                 asid);
6657                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6658                                 asid);
6659                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6660                                 asid);
6661         }
6662
6663         if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6664                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6665                                 asid);
6666                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6667                                 asid);
6668                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6669                                 asid);
6670                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6671                                 asid);
6672                 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6673                                 asid);
6674         }
6675
6676         if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6677                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6678                                 asid);
6679                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6680                                 asid);
6681                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6682                                 asid);
6683                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6684                                 asid);
6685                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6686                                 asid);
6687         }
6688
6689         if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6690                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6691                                 asid);
6692                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6693                                 asid);
6694                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6695                                 asid);
6696                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6697                                 asid);
6698                 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6699                                 asid);
6700         }
6701
6702         if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6703                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6704                                 asid);
6705                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6706                                 asid);
6707                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6708                                 asid);
6709                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6710                                 asid);
6711                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6712                                 asid);
6713         }
6714
6715         if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6716                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6717                                 asid);
6718                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6719                                 asid);
6720                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6721                                 asid);
6722                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6723                                 asid);
6724                 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6725                                 asid);
6726         }
6727
6728         hdev->asic_funcs->set_clock_gating(hdev);
6729
6730         mutex_unlock(&gaudi->clk_gate_mutex);
6731 }
6732
6733 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6734                 struct hl_cs_job *job)
6735 {
6736         struct packet_msg_prot *fence_pkt;
6737         u32 *fence_ptr;
6738         dma_addr_t fence_dma_addr;
6739         struct hl_cb *cb;
6740         u32 tmp, timeout, dma_offset;
6741         int rc;
6742
6743         if (hdev->pldm)
6744                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6745         else
6746                 timeout = HL_DEVICE_TIMEOUT_USEC;
6747
6748         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
6749                 dev_err_ratelimited(hdev->dev,
6750                         "Can't send driver job on QMAN0 because the device is not idle\n");
6751                 return -EBUSY;
6752         }
6753
6754         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
6755                                                         &fence_dma_addr);
6756         if (!fence_ptr) {
6757                 dev_err(hdev->dev,
6758                         "Failed to allocate fence memory for QMAN0\n");
6759                 return -ENOMEM;
6760         }
6761
6762         cb = job->patched_cb;
6763
6764         fence_pkt = cb->kernel_address +
6765                         job->job_cb_size - sizeof(struct packet_msg_prot);
6766
6767         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6768         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6769         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6770
6771         fence_pkt->ctl = cpu_to_le32(tmp);
6772         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6773         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6774
6775         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6776
6777         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
6778
6779         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6780                                         job->job_cb_size, cb->bus_address);
6781         if (rc) {
6782                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6783                 goto free_fence_ptr;
6784         }
6785
6786         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6787                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6788                                 timeout, true);
6789
6790         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6791
6792         if (rc == -ETIMEDOUT) {
6793                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6794                 goto free_fence_ptr;
6795         }
6796
6797 free_fence_ptr:
6798         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6799                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6800
6801         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
6802                                         fence_dma_addr);
6803         return rc;
6804 }
6805
6806 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6807 {
6808         if (event_type >= GAUDI_EVENT_SIZE)
6809                 goto event_not_supported;
6810
6811         if (!gaudi_irq_map_table[event_type].valid)
6812                 goto event_not_supported;
6813
6814         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6815
6816         return;
6817
6818 event_not_supported:
6819         snprintf(desc, size, "N/A");
6820 }
6821
6822 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
6823                                                         u32 x_y, bool is_write)
6824 {
6825         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6826
6827         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6828                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6829
6830         switch (x_y) {
6831         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6832         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6833                 dma_id[0] = 0;
6834                 dma_id[1] = 2;
6835                 break;
6836         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6837         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6838                 dma_id[0] = 1;
6839                 dma_id[1] = 3;
6840                 break;
6841         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6842         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6843                 dma_id[0] = 4;
6844                 dma_id[1] = 6;
6845                 break;
6846         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6847         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6848                 dma_id[0] = 5;
6849                 dma_id[1] = 7;
6850                 break;
6851         default:
6852                 goto unknown_initiator;
6853         }
6854
6855         for (i = 0 ; i < 2 ; i++) {
6856                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6857                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6858         }
6859
6860         switch (x_y) {
6861         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6862         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6863                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6864                         return "DMA0";
6865                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6866                         return "DMA2";
6867                 else
6868                         return "DMA0 or DMA2";
6869         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6870         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6871                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6872                         return "DMA1";
6873                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6874                         return "DMA3";
6875                 else
6876                         return "DMA1 or DMA3";
6877         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6878         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6879                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6880                         return "DMA4";
6881                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6882                         return "DMA6";
6883                 else
6884                         return "DMA4 or DMA6";
6885         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6886         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6887                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
6888                         return "DMA5";
6889                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
6890                         return "DMA7";
6891                 else
6892                         return "DMA5 or DMA7";
6893         }
6894
6895 unknown_initiator:
6896         return "unknown initiator";
6897 }
6898
6899 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
6900                                                         bool is_write)
6901 {
6902         u32 val, x_y, axi_id;
6903
6904         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6905                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
6906         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6907                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6908         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6909                         RAZWI_INITIATOR_AXI_ID_SHIFT);
6910
6911         switch (x_y) {
6912         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6913                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6914                         return "TPC0";
6915                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6916                         return "NIC0";
6917                 break;
6918         case RAZWI_INITIATOR_ID_X_Y_TPC1:
6919                 return "TPC1";
6920         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6921         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6922                 return "MME0";
6923         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6924         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6925                 return "MME1";
6926         case RAZWI_INITIATOR_ID_X_Y_TPC2:
6927                 return "TPC2";
6928         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6929                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6930                         return "TPC3";
6931                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6932                         return "PCI";
6933                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6934                         return "CPU";
6935                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6936                         return "PSOC";
6937                 break;
6938         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6939         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6940         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6941         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6942         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6943         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6944         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6945         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6946                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
6947         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6948                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6949                         return "TPC4";
6950                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6951                         return "NIC1";
6952                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6953                         return "NIC2";
6954                 break;
6955         case RAZWI_INITIATOR_ID_X_Y_TPC5:
6956                 return "TPC5";
6957         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6958         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6959                 return "MME2";
6960         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6961         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6962                 return "MME3";
6963         case RAZWI_INITIATOR_ID_X_Y_TPC6:
6964                 return "TPC6";
6965         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6966                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
6967                         return "TPC7";
6968                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
6969                         return "NIC4";
6970                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
6971                         return "NIC5";
6972                 break;
6973         default:
6974                 break;
6975         }
6976
6977         dev_err(hdev->dev,
6978                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6979                 val,
6980                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6981                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6982                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6983                         RAZWI_INITIATOR_AXI_ID_MASK);
6984
6985         return "unknown initiator";
6986 }
6987
6988 static void gaudi_print_razwi_info(struct hl_device *hdev)
6989 {
6990         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6991                 dev_err_ratelimited(hdev->dev,
6992                         "RAZWI event caused by illegal write of %s\n",
6993                         gaudi_get_razwi_initiator_name(hdev, true));
6994                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6995         }
6996
6997         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6998                 dev_err_ratelimited(hdev->dev,
6999                         "RAZWI event caused by illegal read of %s\n",
7000                         gaudi_get_razwi_initiator_name(hdev, false));
7001                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
7002         }
7003 }
7004
7005 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
7006 {
7007         struct gaudi_device *gaudi = hdev->asic_specific;
7008         u64 addr;
7009         u32 val;
7010
7011         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
7012                 return;
7013
7014         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
7015         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7016                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
7017                 addr <<= 32;
7018                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
7019
7020                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
7021                                         addr);
7022
7023                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
7024         }
7025
7026         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
7027         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
7028                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
7029                 addr <<= 32;
7030                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
7031
7032                 dev_err_ratelimited(hdev->dev,
7033                                 "MMU access error on va 0x%llx\n", addr);
7034
7035                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
7036         }
7037 }
7038
7039 /*
7040  *  +-------------------+------------------------------------------------------+
7041  *  | Configuration Reg |                     Description                      |
7042  *  |      Address      |                                                      |
7043  *  +-------------------+------------------------------------------------------+
7044  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
7045  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
7046  *  |                   |0xF34 memory wrappers 63:32                           |
7047  *  |                   |0xF38 memory wrappers 95:64                           |
7048  *  |                   |0xF3C memory wrappers 127:96                          |
7049  *  +-------------------+------------------------------------------------------+
7050  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
7051  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
7052  *  |                   |0xF44 memory wrappers 63:32                           |
7053  *  |                   |0xF48 memory wrappers 95:64                           |
7054  *  |                   |0xF4C memory wrappers 127:96                          |
7055  *  +-------------------+------------------------------------------------------+
7056  */
7057 static int gaudi_extract_ecc_info(struct hl_device *hdev,
7058                 struct ecc_info_extract_params *params, u64 *ecc_address,
7059                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
7060 {
7061         struct gaudi_device *gaudi = hdev->asic_specific;
7062         u32 i, num_mem_regs, reg, err_bit;
7063         u64 err_addr, err_word = 0;
7064         int rc = 0;
7065
7066         num_mem_regs = params->num_memories / 32 +
7067                         ((params->num_memories % 32) ? 1 : 0);
7068
7069         if (params->block_address >= CFG_BASE)
7070                 params->block_address -= CFG_BASE;
7071
7072         if (params->derr)
7073                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
7074         else
7075                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
7076
7077         if (params->disable_clock_gating) {
7078                 mutex_lock(&gaudi->clk_gate_mutex);
7079                 hdev->asic_funcs->disable_clock_gating(hdev);
7080         }
7081
7082         /* Set invalid wrapper index */
7083         *memory_wrapper_idx = 0xFF;
7084
7085         /* Iterate through memory wrappers, a single bit must be set */
7086         for (i = 0 ; i < num_mem_regs ; i++) {
7087                 err_addr += i * 4;
7088                 err_word = RREG32(err_addr);
7089                 if (err_word) {
7090                         err_bit = __ffs(err_word);
7091                         *memory_wrapper_idx = err_bit + (32 * i);
7092                         break;
7093                 }
7094         }
7095
7096         if (*memory_wrapper_idx == 0xFF) {
7097                 dev_err(hdev->dev, "ECC error information cannot be found\n");
7098                 rc = -EINVAL;
7099                 goto enable_clk_gate;
7100         }
7101
7102         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
7103                         *memory_wrapper_idx);
7104
7105         *ecc_address =
7106                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
7107         *ecc_syndrom =
7108                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
7109
7110         /* Clear error indication */
7111         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
7112         if (params->derr)
7113                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
7114         else
7115                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
7116
7117         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
7118
7119 enable_clk_gate:
7120         if (params->disable_clock_gating) {
7121                 hdev->asic_funcs->set_clock_gating(hdev);
7122
7123                 mutex_unlock(&gaudi->clk_gate_mutex);
7124         }
7125
7126         return rc;
7127 }
7128
7129 /*
7130  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
7131  *
7132  * @idx: the current pi/ci value
7133  * @q_len: the queue length (power of 2)
7134  *
7135  * @return the cyclically decremented index
7136  */
7137 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
7138 {
7139         u32 mask = q_len - 1;
7140
7141         /*
7142          * modular decrement is equivalent to adding (queue_size -1)
7143          * later we take LSBs to make sure the value is in the
7144          * range [0, queue_len - 1]
7145          */
7146         return (idx + q_len - 1) & mask;
7147 }
7148
7149 /**
7150  * gaudi_print_sw_config_stream_data - print SW config stream data
7151  *
7152  * @hdev: pointer to the habanalabs device structure
7153  * @stream: the QMAN's stream
7154  * @qman_base: base address of QMAN registers block
7155  */
7156 static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream,
7157                                                 u64 qman_base)
7158 {
7159         u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
7160         u32 cq_ptr_lo_off, size;
7161
7162         cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
7163
7164         cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
7165                                                 stream * cq_ptr_lo_off;
7166         cq_ptr_hi = cq_ptr_lo +
7167                                 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
7168         cq_tsize = cq_ptr_lo +
7169                                 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
7170
7171         cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
7172         size = RREG32(cq_tsize);
7173         dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
7174                                                         stream, cq_ptr, size);
7175 }
7176
7177 /**
7178  * gaudi_print_last_pqes_on_err - print last PQEs on error
7179  *
7180  * @hdev: pointer to the habanalabs device structure
7181  * @qid_base: first QID of the QMAN (out of 4 streams)
7182  * @stream: the QMAN's stream
7183  * @qman_base: base address of QMAN registers block
7184  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
7185  */
7186 static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
7187                                                 u32 stream, u64 qman_base,
7188                                                 bool pr_sw_conf)
7189 {
7190         u32 ci, qm_ci_stream_off, queue_len;
7191         struct hl_hw_queue *q;
7192         u64 pq_ci;
7193         int i;
7194
7195         q = &hdev->kernel_queues[qid_base + stream];
7196
7197         qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
7198         pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
7199                                                 stream * qm_ci_stream_off;
7200
7201         queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
7202                                         q->int_queue_len : HL_QUEUE_LENGTH;
7203
7204         hdev->asic_funcs->hw_queues_lock(hdev);
7205
7206         if (pr_sw_conf)
7207                 gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7208
7209         ci = RREG32(pq_ci);
7210
7211         /* we should start printing form ci -1 */
7212         ci = gaudi_queue_idx_dec(ci, queue_len);
7213
7214         for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
7215                 struct hl_bd *bd;
7216                 u64 addr;
7217                 u32 len;
7218
7219                 bd = q->kernel_address;
7220                 bd += ci;
7221
7222                 len = le32_to_cpu(bd->len);
7223                 /* len 0 means uninitialized entry- break */
7224                 if (!len)
7225                         break;
7226
7227                 addr = le64_to_cpu(bd->ptr);
7228
7229                 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
7230                                                         stream, ci, addr, len);
7231
7232                 /* get previous ci, wrap if needed */
7233                 ci = gaudi_queue_idx_dec(ci, queue_len);
7234         }
7235
7236         hdev->asic_funcs->hw_queues_unlock(hdev);
7237 }
7238
7239 /**
7240  * print_qman_data_on_err - extract QMAN data on error
7241  *
7242  * @hdev: pointer to the habanalabs device structure
7243  * @qid_base: first QID of the QMAN (out of 4 streams)
7244  * @stream: the QMAN's stream
7245  * @qman_base: base address of QMAN registers block
7246  *
7247  * This function attempt to exatract as much data as possible on QMAN error.
7248  * On upper CP print the SW config stream data and last 8 PQEs.
7249  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
7250  */
7251 static void print_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
7252                                                 u32 stream, u64 qman_base)
7253 {
7254         u32 i;
7255
7256         if (stream != QMAN_STREAMS) {
7257                 gaudi_print_last_pqes_on_err(hdev, qid_base, stream, qman_base,
7258                                                                         true);
7259                 return;
7260         }
7261
7262         gaudi_print_sw_config_stream_data(hdev, stream, qman_base);
7263
7264         for (i = 0; i < QMAN_STREAMS; i++)
7265                 gaudi_print_last_pqes_on_err(hdev, qid_base, i, qman_base,
7266                                                                         false);
7267 }
7268
7269 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
7270                                           const char *qm_name,
7271                                           u64 qman_base,
7272                                           u32 qid_base)
7273 {
7274         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
7275         u64 glbl_sts_addr, arb_err_addr;
7276         char reg_desc[32];
7277
7278         glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
7279         arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
7280
7281         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
7282         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7283                 glbl_sts_clr_val = 0;
7284                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7285
7286                 if (!glbl_sts_val)
7287                         continue;
7288
7289                 if (i == QMAN_STREAMS)
7290                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
7291                 else
7292                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7293
7294                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
7295                         if (glbl_sts_val & BIT(j)) {
7296                                 dev_err_ratelimited(hdev->dev,
7297                                                 "%s %s. err cause: %s\n",
7298                                                 qm_name, reg_desc,
7299                                                 gaudi_qman_error_cause[j]);
7300                                 glbl_sts_clr_val |= BIT(j);
7301                         }
7302                 }
7303
7304                 /* Write 1 clear errors */
7305                 if (!hdev->stop_on_err)
7306                         WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
7307                 else
7308                         print_qman_data_on_err(hdev, qid_base, i, qman_base);
7309         }
7310
7311         arb_err_val = RREG32(arb_err_addr);
7312
7313         if (!arb_err_val)
7314                 return;
7315
7316         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7317                 if (arb_err_val & BIT(j)) {
7318                         dev_err_ratelimited(hdev->dev,
7319                                         "%s ARB_ERR. err cause: %s\n",
7320                                         qm_name,
7321                                         gaudi_qman_arb_error_cause[j]);
7322                 }
7323         }
7324 }
7325
7326 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7327                 struct hl_eq_sm_sei_data *sei_data)
7328 {
7329         u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7330
7331         switch (sei_data->sei_cause) {
7332         case SM_SEI_SO_OVERFLOW:
7333                 dev_err(hdev->dev,
7334                         "SM %u SEI Error: SO %u overflow/underflow",
7335                         index, le32_to_cpu(sei_data->sei_log));
7336                 break;
7337         case SM_SEI_LBW_4B_UNALIGNED:
7338                 dev_err(hdev->dev,
7339                         "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7340                         index, le32_to_cpu(sei_data->sei_log));
7341                 break;
7342         case SM_SEI_AXI_RESPONSE_ERR:
7343                 dev_err(hdev->dev,
7344                         "SM %u SEI Error: AXI ID %u response error",
7345                         index, le32_to_cpu(sei_data->sei_log));
7346                 break;
7347         default:
7348                 dev_err(hdev->dev, "Unknown SM SEI cause %u",
7349                                 le32_to_cpu(sei_data->sei_log));
7350                 break;
7351         }
7352 }
7353
7354 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7355                 struct hl_eq_ecc_data *ecc_data)
7356 {
7357         struct ecc_info_extract_params params;
7358         u64 ecc_address = 0, ecc_syndrom = 0;
7359         u8 index, memory_wrapper_idx = 0;
7360         bool extract_info_from_fw;
7361         int rc;
7362
7363         switch (event_type) {
7364         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7365         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7366                 extract_info_from_fw = true;
7367                 break;
7368         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7369                 index = event_type - GAUDI_EVENT_TPC0_SERR;
7370                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7371                 params.num_memories = 90;
7372                 params.derr = false;
7373                 params.disable_clock_gating = true;
7374                 extract_info_from_fw = false;
7375                 break;
7376         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7377                 index = event_type - GAUDI_EVENT_TPC0_DERR;
7378                 params.block_address =
7379                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7380                 params.num_memories = 90;
7381                 params.derr = true;
7382                 params.disable_clock_gating = true;
7383                 extract_info_from_fw = false;
7384                 break;
7385         case GAUDI_EVENT_MME0_ACC_SERR:
7386         case GAUDI_EVENT_MME1_ACC_SERR:
7387         case GAUDI_EVENT_MME2_ACC_SERR:
7388         case GAUDI_EVENT_MME3_ACC_SERR:
7389                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7390                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7391                 params.num_memories = 128;
7392                 params.derr = false;
7393                 params.disable_clock_gating = true;
7394                 extract_info_from_fw = false;
7395                 break;
7396         case GAUDI_EVENT_MME0_ACC_DERR:
7397         case GAUDI_EVENT_MME1_ACC_DERR:
7398         case GAUDI_EVENT_MME2_ACC_DERR:
7399         case GAUDI_EVENT_MME3_ACC_DERR:
7400                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7401                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7402                 params.num_memories = 128;
7403                 params.derr = true;
7404                 params.disable_clock_gating = true;
7405                 extract_info_from_fw = false;
7406                 break;
7407         case GAUDI_EVENT_MME0_SBAB_SERR:
7408         case GAUDI_EVENT_MME1_SBAB_SERR:
7409         case GAUDI_EVENT_MME2_SBAB_SERR:
7410         case GAUDI_EVENT_MME3_SBAB_SERR:
7411                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7412                 params.block_address =
7413                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7414                 params.num_memories = 33;
7415                 params.derr = false;
7416                 params.disable_clock_gating = true;
7417                 extract_info_from_fw = false;
7418                 break;
7419         case GAUDI_EVENT_MME0_SBAB_DERR:
7420         case GAUDI_EVENT_MME1_SBAB_DERR:
7421         case GAUDI_EVENT_MME2_SBAB_DERR:
7422         case GAUDI_EVENT_MME3_SBAB_DERR:
7423                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7424                 params.block_address =
7425                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7426                 params.num_memories = 33;
7427                 params.derr = true;
7428                 params.disable_clock_gating = true;
7429                 extract_info_from_fw = false;
7430                 break;
7431         default:
7432                 return;
7433         }
7434
7435         if (extract_info_from_fw) {
7436                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7437                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7438                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7439         } else {
7440                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7441                                 &ecc_syndrom, &memory_wrapper_idx);
7442                 if (rc)
7443                         return;
7444         }
7445
7446         dev_err(hdev->dev,
7447                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7448                 ecc_address, ecc_syndrom, memory_wrapper_idx);
7449 }
7450
7451 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
7452 {
7453         u64 qman_base;
7454         char desc[32];
7455         u32 qid_base;
7456         u8 index;
7457
7458         switch (event_type) {
7459         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7460                 index = event_type - GAUDI_EVENT_TPC0_QM;
7461                 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7462                 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7463                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7464                 break;
7465         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7466                 index = event_type - GAUDI_EVENT_MME0_QM;
7467                 qid_base = GAUDI_QUEUE_ID_MME_0_0 + index * QMAN_STREAMS;
7468                 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7469                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7470                 break;
7471         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7472                 index = event_type - GAUDI_EVENT_DMA0_QM;
7473                 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7474                 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7475                 if (index > 1)
7476                         qid_base++;
7477                 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7478                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7479                 break;
7480         case GAUDI_EVENT_NIC0_QM0:
7481                 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7482                 qman_base = mmNIC0_QM0_BASE;
7483                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7484                 break;
7485         case GAUDI_EVENT_NIC0_QM1:
7486                 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7487                 qman_base = mmNIC0_QM1_BASE;
7488                 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7489                 break;
7490         case GAUDI_EVENT_NIC1_QM0:
7491                 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7492                 qman_base = mmNIC1_QM0_BASE;
7493                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7494                 break;
7495         case GAUDI_EVENT_NIC1_QM1:
7496                 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7497                 qman_base = mmNIC1_QM1_BASE;
7498                 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7499                 break;
7500         case GAUDI_EVENT_NIC2_QM0:
7501                 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7502                 qman_base = mmNIC2_QM0_BASE;
7503                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7504                 break;
7505         case GAUDI_EVENT_NIC2_QM1:
7506                 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7507                 qman_base = mmNIC2_QM1_BASE;
7508                 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7509                 break;
7510         case GAUDI_EVENT_NIC3_QM0:
7511                 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7512                 qman_base = mmNIC3_QM0_BASE;
7513                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7514                 break;
7515         case GAUDI_EVENT_NIC3_QM1:
7516                 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7517                 qman_base = mmNIC3_QM1_BASE;
7518                 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7519                 break;
7520         case GAUDI_EVENT_NIC4_QM0:
7521                 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7522                 qman_base = mmNIC4_QM0_BASE;
7523                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7524                 break;
7525         case GAUDI_EVENT_NIC4_QM1:
7526                 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7527                 qman_base = mmNIC4_QM1_BASE;
7528                 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7529                 break;
7530         default:
7531                 return;
7532         }
7533
7534         gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base);
7535 }
7536
7537 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7538                                         bool razwi)
7539 {
7540         char desc[64] = "";
7541
7542         gaudi_get_event_desc(event_type, desc, sizeof(desc));
7543         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7544                 event_type, desc);
7545
7546         if (razwi) {
7547                 gaudi_print_razwi_info(hdev);
7548                 gaudi_print_mmu_error_info(hdev);
7549         }
7550 }
7551
7552 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7553                                         struct cpucp_pkt_sync_err *sync_err)
7554 {
7555         struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7556
7557         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
7558                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
7559 }
7560
7561 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7562                                         struct hl_eq_fw_alive *fw_alive)
7563 {
7564         dev_err(hdev->dev,
7565                 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7566                 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
7567                 "Minor" : "Critical", fw_alive->process_id,
7568                 fw_alive->thread_id, fw_alive->uptime_seconds);
7569 }
7570
7571 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
7572 {
7573         struct gaudi_device *gaudi = hdev->asic_specific;
7574
7575         /* Unmask all IRQs since some could have been received
7576          * during the soft reset
7577          */
7578         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
7579 }
7580
7581 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7582                         struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7583 {
7584         u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7585         int rc = 0;
7586
7587         if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7588                                         CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7589                 if (!hbm_ecc_data) {
7590                         dev_err(hdev->dev, "No FW ECC data");
7591                         return 0;
7592                 }
7593
7594                 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7595                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7596                 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7597                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7598                 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7599                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7600                 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7601                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7602                 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7603                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7604                 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7605                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7606                 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7607                                 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7608
7609                 dev_err(hdev->dev,
7610                         "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7611                         device, ch, wr_par, rd_par, ca_par, serr, derr);
7612                 dev_err(hdev->dev,
7613                         "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7614                         device, ch, hbm_ecc_data->first_addr, type,
7615                         hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7616                         hbm_ecc_data->dec_cnt);
7617                 return 0;
7618         }
7619
7620         if (hdev->asic_prop.fw_security_enabled) {
7621                 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7622                 return 0;
7623         }
7624
7625         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7626         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7627                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7628                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7629                 if (val) {
7630                         rc = -EIO;
7631                         dev_err(hdev->dev,
7632                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7633                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7634                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7635                                 (val >> 4) & 0x1);
7636
7637                         val2 = RREG32(base + ch * 0x1000 + 0x060);
7638                         dev_err(hdev->dev,
7639                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7640                                 device, ch * 2,
7641                                 RREG32(base + ch * 0x1000 + 0x064),
7642                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7643                                 (val2 & 0xFF0000) >> 16,
7644                                 (val2 & 0xFF000000) >> 24);
7645                 }
7646
7647                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7648                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7649                 if (val) {
7650                         rc = -EIO;
7651                         dev_err(hdev->dev,
7652                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7653                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7654                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
7655                                 (val >> 4) & 0x1);
7656
7657                         val2 = RREG32(base + ch * 0x1000 + 0x070);
7658                         dev_err(hdev->dev,
7659                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7660                                 device, ch * 2 + 1,
7661                                 RREG32(base + ch * 0x1000 + 0x074),
7662                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7663                                 (val2 & 0xFF0000) >> 16,
7664                                 (val2 & 0xFF000000) >> 24);
7665                 }
7666
7667                 /* Clear interrupts */
7668                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7669                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7670                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7671                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7672                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7673                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7674         }
7675
7676         val  = RREG32(base + 0x8F30);
7677         val2 = RREG32(base + 0x8F34);
7678         if (val | val2) {
7679                 rc = -EIO;
7680                 dev_err(hdev->dev,
7681                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7682                         device, val, val2);
7683         }
7684         val  = RREG32(base + 0x8F40);
7685         val2 = RREG32(base + 0x8F44);
7686         if (val | val2) {
7687                 rc = -EIO;
7688                 dev_err(hdev->dev,
7689                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7690                         device, val, val2);
7691         }
7692
7693         return rc;
7694 }
7695
7696 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7697 {
7698         switch (hbm_event_type) {
7699         case GAUDI_EVENT_HBM0_SPI_0:
7700         case GAUDI_EVENT_HBM0_SPI_1:
7701                 return 0;
7702         case GAUDI_EVENT_HBM1_SPI_0:
7703         case GAUDI_EVENT_HBM1_SPI_1:
7704                 return 1;
7705         case GAUDI_EVENT_HBM2_SPI_0:
7706         case GAUDI_EVENT_HBM2_SPI_1:
7707                 return 2;
7708         case GAUDI_EVENT_HBM3_SPI_0:
7709         case GAUDI_EVENT_HBM3_SPI_1:
7710                 return 3;
7711         default:
7712                 break;
7713         }
7714
7715         /* Should never happen */
7716         return 0;
7717 }
7718
7719 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7720                                         char *interrupt_name)
7721 {
7722         struct gaudi_device *gaudi = hdev->asic_specific;
7723         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7724         bool soft_reset_required = false;
7725
7726         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
7727          * gating, and thus cannot be done in CPU-CP and should be done instead
7728          * by the driver.
7729          */
7730
7731         mutex_lock(&gaudi->clk_gate_mutex);
7732
7733         hdev->asic_funcs->disable_clock_gating(hdev);
7734
7735         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7736                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7737
7738         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7739                 if (tpc_interrupts_cause & BIT(i)) {
7740                         dev_err_ratelimited(hdev->dev,
7741                                         "TPC%d_%s interrupt cause: %s\n",
7742                                         tpc_id, interrupt_name,
7743                                         gaudi_tpc_interrupts_cause[i]);
7744                         /* If this is QM error, we need to soft-reset */
7745                         if (i == 15)
7746                                 soft_reset_required = true;
7747                 }
7748
7749         /* Clear interrupts */
7750         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7751
7752         hdev->asic_funcs->set_clock_gating(hdev);
7753
7754         mutex_unlock(&gaudi->clk_gate_mutex);
7755
7756         return soft_reset_required;
7757 }
7758
7759 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7760 {
7761         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7762 }
7763
7764 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7765 {
7766         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7767 }
7768
7769 static void gaudi_print_clk_change_info(struct hl_device *hdev,
7770                                         u16 event_type)
7771 {
7772         switch (event_type) {
7773         case GAUDI_EVENT_FIX_POWER_ENV_S:
7774                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
7775                 dev_info_ratelimited(hdev->dev,
7776                         "Clock throttling due to power consumption\n");
7777                 break;
7778
7779         case GAUDI_EVENT_FIX_POWER_ENV_E:
7780                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
7781                 dev_info_ratelimited(hdev->dev,
7782                         "Power envelop is safe, back to optimal clock\n");
7783                 break;
7784
7785         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7786                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
7787                 dev_info_ratelimited(hdev->dev,
7788                         "Clock throttling due to overheating\n");
7789                 break;
7790
7791         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7792                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
7793                 dev_info_ratelimited(hdev->dev,
7794                         "Thermal envelop is safe, back to optimal clock\n");
7795                 break;
7796
7797         default:
7798                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7799                         event_type);
7800                 break;
7801         }
7802 }
7803
7804 static void gaudi_handle_eqe(struct hl_device *hdev,
7805                                 struct hl_eq_entry *eq_entry)
7806 {
7807         struct gaudi_device *gaudi = hdev->asic_specific;
7808         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7809         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7810                         >> EQ_CTL_EVENT_TYPE_SHIFT);
7811         u8 cause;
7812         bool reset_required;
7813
7814         gaudi->events_stat[event_type]++;
7815         gaudi->events_stat_aggregate[event_type]++;
7816
7817         switch (event_type) {
7818         case GAUDI_EVENT_PCIE_CORE_DERR:
7819         case GAUDI_EVENT_PCIE_IF_DERR:
7820         case GAUDI_EVENT_PCIE_PHY_DERR:
7821         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7822         case GAUDI_EVENT_MME0_ACC_DERR:
7823         case GAUDI_EVENT_MME0_SBAB_DERR:
7824         case GAUDI_EVENT_MME1_ACC_DERR:
7825         case GAUDI_EVENT_MME1_SBAB_DERR:
7826         case GAUDI_EVENT_MME2_ACC_DERR:
7827         case GAUDI_EVENT_MME2_SBAB_DERR:
7828         case GAUDI_EVENT_MME3_ACC_DERR:
7829         case GAUDI_EVENT_MME3_SBAB_DERR:
7830         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7831                 fallthrough;
7832         case GAUDI_EVENT_CPU_IF_ECC_DERR:
7833         case GAUDI_EVENT_PSOC_MEM_DERR:
7834         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7835         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7836         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7837         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7838         case GAUDI_EVENT_MMU_DERR:
7839         case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7840                 gaudi_print_irq_info(hdev, event_type, true);
7841                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7842                 goto reset_device;
7843
7844         case GAUDI_EVENT_GIC500:
7845         case GAUDI_EVENT_AXI_ECC:
7846         case GAUDI_EVENT_L2_RAM_ECC:
7847         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7848                 gaudi_print_irq_info(hdev, event_type, false);
7849                 goto reset_device;
7850
7851         case GAUDI_EVENT_HBM0_SPI_0:
7852         case GAUDI_EVENT_HBM1_SPI_0:
7853         case GAUDI_EVENT_HBM2_SPI_0:
7854         case GAUDI_EVENT_HBM3_SPI_0:
7855                 gaudi_print_irq_info(hdev, event_type, false);
7856                 gaudi_hbm_read_interrupts(hdev,
7857                                 gaudi_hbm_event_to_dev(event_type),
7858                                 &eq_entry->hbm_ecc_data);
7859                 goto reset_device;
7860
7861         case GAUDI_EVENT_HBM0_SPI_1:
7862         case GAUDI_EVENT_HBM1_SPI_1:
7863         case GAUDI_EVENT_HBM2_SPI_1:
7864         case GAUDI_EVENT_HBM3_SPI_1:
7865                 gaudi_print_irq_info(hdev, event_type, false);
7866                 gaudi_hbm_read_interrupts(hdev,
7867                                 gaudi_hbm_event_to_dev(event_type),
7868                                 &eq_entry->hbm_ecc_data);
7869                 hl_fw_unmask_irq(hdev, event_type);
7870                 break;
7871
7872         case GAUDI_EVENT_TPC0_DEC:
7873         case GAUDI_EVENT_TPC1_DEC:
7874         case GAUDI_EVENT_TPC2_DEC:
7875         case GAUDI_EVENT_TPC3_DEC:
7876         case GAUDI_EVENT_TPC4_DEC:
7877         case GAUDI_EVENT_TPC5_DEC:
7878         case GAUDI_EVENT_TPC6_DEC:
7879         case GAUDI_EVENT_TPC7_DEC:
7880                 gaudi_print_irq_info(hdev, event_type, true);
7881                 reset_required = gaudi_tpc_read_interrupts(hdev,
7882                                         tpc_dec_event_to_tpc_id(event_type),
7883                                         "AXI_SLV_DEC_Error");
7884                 if (reset_required) {
7885                         dev_err(hdev->dev, "hard reset required due to %s\n",
7886                                 gaudi_irq_map_table[event_type].name);
7887
7888                         goto reset_device;
7889                 } else {
7890                         hl_fw_unmask_irq(hdev, event_type);
7891                 }
7892                 break;
7893
7894         case GAUDI_EVENT_TPC0_KRN_ERR:
7895         case GAUDI_EVENT_TPC1_KRN_ERR:
7896         case GAUDI_EVENT_TPC2_KRN_ERR:
7897         case GAUDI_EVENT_TPC3_KRN_ERR:
7898         case GAUDI_EVENT_TPC4_KRN_ERR:
7899         case GAUDI_EVENT_TPC5_KRN_ERR:
7900         case GAUDI_EVENT_TPC6_KRN_ERR:
7901         case GAUDI_EVENT_TPC7_KRN_ERR:
7902                 gaudi_print_irq_info(hdev, event_type, true);
7903                 reset_required = gaudi_tpc_read_interrupts(hdev,
7904                                         tpc_krn_event_to_tpc_id(event_type),
7905                                         "KRN_ERR");
7906                 if (reset_required) {
7907                         dev_err(hdev->dev, "hard reset required due to %s\n",
7908                                 gaudi_irq_map_table[event_type].name);
7909
7910                         goto reset_device;
7911                 } else {
7912                         hl_fw_unmask_irq(hdev, event_type);
7913                 }
7914                 break;
7915
7916         case GAUDI_EVENT_PCIE_CORE_SERR:
7917         case GAUDI_EVENT_PCIE_IF_SERR:
7918         case GAUDI_EVENT_PCIE_PHY_SERR:
7919         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7920         case GAUDI_EVENT_MME0_ACC_SERR:
7921         case GAUDI_EVENT_MME0_SBAB_SERR:
7922         case GAUDI_EVENT_MME1_ACC_SERR:
7923         case GAUDI_EVENT_MME1_SBAB_SERR:
7924         case GAUDI_EVENT_MME2_ACC_SERR:
7925         case GAUDI_EVENT_MME2_SBAB_SERR:
7926         case GAUDI_EVENT_MME3_ACC_SERR:
7927         case GAUDI_EVENT_MME3_SBAB_SERR:
7928         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7929         case GAUDI_EVENT_CPU_IF_ECC_SERR:
7930         case GAUDI_EVENT_PSOC_MEM_SERR:
7931         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7932         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7933         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7934         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7935                 fallthrough;
7936         case GAUDI_EVENT_MMU_SERR:
7937                 gaudi_print_irq_info(hdev, event_type, true);
7938                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7939                 hl_fw_unmask_irq(hdev, event_type);
7940                 break;
7941
7942         case GAUDI_EVENT_PCIE_DEC:
7943         case GAUDI_EVENT_MME0_WBC_RSP:
7944         case GAUDI_EVENT_MME0_SBAB0_RSP:
7945         case GAUDI_EVENT_MME1_WBC_RSP:
7946         case GAUDI_EVENT_MME1_SBAB0_RSP:
7947         case GAUDI_EVENT_MME2_WBC_RSP:
7948         case GAUDI_EVENT_MME2_SBAB0_RSP:
7949         case GAUDI_EVENT_MME3_WBC_RSP:
7950         case GAUDI_EVENT_MME3_SBAB0_RSP:
7951         case GAUDI_EVENT_CPU_AXI_SPLITTER:
7952         case GAUDI_EVENT_PSOC_AXI_DEC:
7953         case GAUDI_EVENT_PSOC_PRSTN_FALL:
7954         case GAUDI_EVENT_MMU_PAGE_FAULT:
7955         case GAUDI_EVENT_MMU_WR_PERM:
7956         case GAUDI_EVENT_RAZWI_OR_ADC:
7957         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7958         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7959         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7960                 fallthrough;
7961         case GAUDI_EVENT_NIC0_QM0:
7962         case GAUDI_EVENT_NIC0_QM1:
7963         case GAUDI_EVENT_NIC1_QM0:
7964         case GAUDI_EVENT_NIC1_QM1:
7965         case GAUDI_EVENT_NIC2_QM0:
7966         case GAUDI_EVENT_NIC2_QM1:
7967         case GAUDI_EVENT_NIC3_QM0:
7968         case GAUDI_EVENT_NIC3_QM1:
7969         case GAUDI_EVENT_NIC4_QM0:
7970         case GAUDI_EVENT_NIC4_QM1:
7971         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7972                 gaudi_print_irq_info(hdev, event_type, true);
7973                 gaudi_handle_qman_err(hdev, event_type);
7974                 hl_fw_unmask_irq(hdev, event_type);
7975                 break;
7976
7977         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7978                 gaudi_print_irq_info(hdev, event_type, true);
7979                 goto reset_device;
7980
7981         case GAUDI_EVENT_TPC0_BMON_SPMU:
7982         case GAUDI_EVENT_TPC1_BMON_SPMU:
7983         case GAUDI_EVENT_TPC2_BMON_SPMU:
7984         case GAUDI_EVENT_TPC3_BMON_SPMU:
7985         case GAUDI_EVENT_TPC4_BMON_SPMU:
7986         case GAUDI_EVENT_TPC5_BMON_SPMU:
7987         case GAUDI_EVENT_TPC6_BMON_SPMU:
7988         case GAUDI_EVENT_TPC7_BMON_SPMU:
7989         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7990                 gaudi_print_irq_info(hdev, event_type, false);
7991                 hl_fw_unmask_irq(hdev, event_type);
7992                 break;
7993
7994         case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7995                 gaudi_print_irq_info(hdev, event_type, false);
7996                 gaudi_print_sm_sei_info(hdev, event_type,
7997                                         &eq_entry->sm_sei_data);
7998                 hl_fw_unmask_irq(hdev, event_type);
7999                 break;
8000
8001         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
8002                 gaudi_print_clk_change_info(hdev, event_type);
8003                 hl_fw_unmask_irq(hdev, event_type);
8004                 break;
8005
8006         case GAUDI_EVENT_PSOC_GPIO_U16_0:
8007                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
8008                 dev_err(hdev->dev,
8009                         "Received high temp H/W interrupt %d (cause %d)\n",
8010                         event_type, cause);
8011                 break;
8012
8013         case GAUDI_EVENT_DEV_RESET_REQ:
8014                 gaudi_print_irq_info(hdev, event_type, false);
8015                 goto reset_device;
8016
8017         case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
8018                 gaudi_print_irq_info(hdev, event_type, false);
8019                 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
8020                 goto reset_device;
8021
8022         case GAUDI_EVENT_FW_ALIVE_S:
8023                 gaudi_print_irq_info(hdev, event_type, false);
8024                 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
8025                 goto reset_device;
8026
8027         default:
8028                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
8029                                 event_type);
8030                 break;
8031         }
8032
8033         return;
8034
8035 reset_device:
8036         if (hdev->hard_reset_on_fw_events)
8037                 hl_device_reset(hdev, HL_RESET_HARD);
8038         else
8039                 hl_fw_unmask_irq(hdev, event_type);
8040 }
8041
8042 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
8043                                         u32 *size)
8044 {
8045         struct gaudi_device *gaudi = hdev->asic_specific;
8046
8047         if (aggregate) {
8048                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
8049                 return gaudi->events_stat_aggregate;
8050         }
8051
8052         *size = (u32) sizeof(gaudi->events_stat);
8053         return gaudi->events_stat;
8054 }
8055
8056 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
8057                                         u32 flags)
8058 {
8059         struct gaudi_device *gaudi = hdev->asic_specific;
8060         u32 status, timeout_usec;
8061         int rc;
8062
8063         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
8064                 hdev->hard_reset_pending)
8065                 return 0;
8066
8067         if (hdev->pldm)
8068                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8069         else
8070                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8071
8072         /* L0 & L1 invalidation */
8073         WREG32(mmSTLB_INV_PS, 3);
8074         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
8075         WREG32(mmSTLB_INV_PS, 2);
8076
8077         rc = hl_poll_timeout(
8078                 hdev,
8079                 mmSTLB_INV_PS,
8080                 status,
8081                 !status,
8082                 1000,
8083                 timeout_usec);
8084
8085         WREG32(mmSTLB_INV_SET, 0);
8086
8087         if (rc) {
8088                 dev_err_ratelimited(hdev->dev,
8089                                         "MMU cache invalidation timeout\n");
8090                 hl_device_reset(hdev, HL_RESET_HARD);
8091         }
8092
8093         return rc;
8094 }
8095
8096 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
8097                                                 bool is_hard, u32 flags,
8098                                                 u32 asid, u64 va, u64 size)
8099 {
8100         /* Treat as invalidate all because there is no range invalidation
8101          * in Gaudi
8102          */
8103         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
8104 }
8105
8106 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
8107                                         u32 asid, u64 phys_addr)
8108 {
8109         u32 status, timeout_usec;
8110         int rc;
8111
8112         if (hdev->pldm)
8113                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
8114         else
8115                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
8116
8117         WREG32(MMU_ASID, asid);
8118         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
8119         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
8120         WREG32(MMU_BUSY, 0x80000000);
8121
8122         rc = hl_poll_timeout(
8123                 hdev,
8124                 MMU_BUSY,
8125                 status,
8126                 !(status & 0x80000000),
8127                 1000,
8128                 timeout_usec);
8129
8130         if (rc) {
8131                 dev_err(hdev->dev,
8132                         "Timeout during MMU hop0 config of asid %d\n", asid);
8133                 return rc;
8134         }
8135
8136         return 0;
8137 }
8138
8139 static int gaudi_send_heartbeat(struct hl_device *hdev)
8140 {
8141         struct gaudi_device *gaudi = hdev->asic_specific;
8142
8143         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8144                 return 0;
8145
8146         return hl_fw_send_heartbeat(hdev);
8147 }
8148
8149 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8150 {
8151         struct gaudi_device *gaudi = hdev->asic_specific;
8152         struct asic_fixed_properties *prop = &hdev->asic_prop;
8153         int rc;
8154
8155         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8156                 return 0;
8157
8158         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8159                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8160                                         mmCPU_BOOT_ERR1);
8161         if (rc)
8162                 return rc;
8163
8164         if (!strlen(prop->cpucp_info.card_name))
8165                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8166                                 CARD_NAME_MAX_LEN);
8167
8168         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8169
8170         set_default_power_values(hdev);
8171
8172         hdev->max_power = prop->max_power_default;
8173
8174         return 0;
8175 }
8176
8177 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
8178                                         u8 mask_len, struct seq_file *s)
8179 {
8180         struct gaudi_device *gaudi = hdev->asic_specific;
8181         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8182         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8183         const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8184         unsigned long *mask = (unsigned long *)mask_arr;
8185         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8186         bool is_idle = true, is_eng_idle, is_slave;
8187         u64 offset;
8188         int i, dma_id, port;
8189
8190         mutex_lock(&gaudi->clk_gate_mutex);
8191
8192         hdev->asic_funcs->disable_clock_gating(hdev);
8193
8194         if (s)
8195                 seq_puts(s,
8196                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8197                         "---  -------  ------------  ----------  -------------\n");
8198
8199         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8200                 dma_id = gaudi_dma_assignment[i];
8201                 offset = dma_id * DMA_QMAN_OFFSET;
8202
8203                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8204                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8205                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8206                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8207                                 IS_DMA_IDLE(dma_core_sts0);
8208                 is_idle &= is_eng_idle;
8209
8210                 if (mask && !is_eng_idle)
8211                         set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8212                 if (s)
8213                         seq_printf(s, fmt, dma_id,
8214                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8215                                 qm_cgm_sts, dma_core_sts0);
8216         }
8217
8218         if (s)
8219                 seq_puts(s,
8220                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8221                         "---  -------  ------------  ----------  ----------\n");
8222
8223         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8224                 offset = i * TPC_QMAN_OFFSET;
8225                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8226                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8227                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8228                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8229                                 IS_TPC_IDLE(tpc_cfg_sts);
8230                 is_idle &= is_eng_idle;
8231
8232                 if (mask && !is_eng_idle)
8233                         set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8234                 if (s)
8235                         seq_printf(s, fmt, i,
8236                                 is_eng_idle ? "Y" : "N",
8237                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8238         }
8239
8240         if (s)
8241                 seq_puts(s,
8242                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8243                         "---  -------  ------------  ----------  -----------\n");
8244
8245         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8246                 offset = i * MME_QMAN_OFFSET;
8247                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8248                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8249
8250                 /* MME 1 & 3 are slaves, no need to check their QMANs */
8251                 is_slave = i % 2;
8252                 if (!is_slave) {
8253                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8254                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8255                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8256                 }
8257
8258                 is_idle &= is_eng_idle;
8259
8260                 if (mask && !is_eng_idle)
8261                         set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8262                 if (s) {
8263                         if (!is_slave)
8264                                 seq_printf(s, fmt, i,
8265                                         is_eng_idle ? "Y" : "N",
8266                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8267                         else
8268                                 seq_printf(s, mme_slave_fmt, i,
8269                                         is_eng_idle ? "Y" : "N", "-",
8270                                         "-", mme_arch_sts);
8271                 }
8272         }
8273
8274         if (s)
8275                 seq_puts(s, "\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8276                                 "---  -------  ------------  ----------\n");
8277
8278         for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8279                 offset = i * NIC_MACRO_QMAN_OFFSET;
8280                 port = 2 * i;
8281                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8282                         qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8283                         qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8284                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8285                         is_idle &= is_eng_idle;
8286
8287                         if (mask && !is_eng_idle)
8288                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8289                         if (s)
8290                                 seq_printf(s, nic_fmt, port,
8291                                                 is_eng_idle ? "Y" : "N",
8292                                                 qm_glbl_sts0, qm_cgm_sts);
8293                 }
8294
8295                 port = 2 * i + 1;
8296                 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8297                         qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8298                         qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8299                         is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8300                         is_idle &= is_eng_idle;
8301
8302                         if (mask && !is_eng_idle)
8303                                 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8304                         if (s)
8305                                 seq_printf(s, nic_fmt, port,
8306                                                 is_eng_idle ? "Y" : "N",
8307                                                 qm_glbl_sts0, qm_cgm_sts);
8308                 }
8309         }
8310
8311         if (s)
8312                 seq_puts(s, "\n");
8313
8314         hdev->asic_funcs->set_clock_gating(hdev);
8315
8316         mutex_unlock(&gaudi->clk_gate_mutex);
8317
8318         return is_idle;
8319 }
8320
8321 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8322         __acquires(&gaudi->hw_queues_lock)
8323 {
8324         struct gaudi_device *gaudi = hdev->asic_specific;
8325
8326         spin_lock(&gaudi->hw_queues_lock);
8327 }
8328
8329 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8330         __releases(&gaudi->hw_queues_lock)
8331 {
8332         struct gaudi_device *gaudi = hdev->asic_specific;
8333
8334         spin_unlock(&gaudi->hw_queues_lock);
8335 }
8336
8337 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8338 {
8339         return hdev->pdev->device;
8340 }
8341
8342 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8343                                 size_t max_size)
8344 {
8345         struct gaudi_device *gaudi = hdev->asic_specific;
8346
8347         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8348                 return 0;
8349
8350         return hl_fw_get_eeprom_data(hdev, data, max_size);
8351 }
8352
8353 /*
8354  * this function should be used only during initialization and/or after reset,
8355  * when there are no active users.
8356  */
8357 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
8358                                 u32 tpc_id)
8359 {
8360         struct gaudi_device *gaudi = hdev->asic_specific;
8361         u64 kernel_timeout;
8362         u32 status, offset;
8363         int rc;
8364
8365         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8366
8367         if (hdev->pldm)
8368                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8369         else
8370                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8371
8372         mutex_lock(&gaudi->clk_gate_mutex);
8373
8374         hdev->asic_funcs->disable_clock_gating(hdev);
8375
8376         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8377                         lower_32_bits(tpc_kernel));
8378         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8379                         upper_32_bits(tpc_kernel));
8380
8381         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8382                         lower_32_bits(tpc_kernel));
8383         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8384                         upper_32_bits(tpc_kernel));
8385         /* set a valid LUT pointer, content is of no significance */
8386         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8387                         lower_32_bits(tpc_kernel));
8388         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8389                         upper_32_bits(tpc_kernel));
8390
8391         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8392                         lower_32_bits(CFG_BASE +
8393                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8394
8395         WREG32(mmTPC0_CFG_TPC_CMD + offset,
8396                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8397                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8398         /* wait a bit for the engine to start executing */
8399         usleep_range(1000, 1500);
8400
8401         /* wait until engine has finished executing */
8402         rc = hl_poll_timeout(
8403                 hdev,
8404                 mmTPC0_CFG_STATUS + offset,
8405                 status,
8406                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8407                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8408                 1000,
8409                 kernel_timeout);
8410
8411         if (rc) {
8412                 dev_err(hdev->dev,
8413                         "Timeout while waiting for TPC%d icache prefetch\n",
8414                         tpc_id);
8415                 hdev->asic_funcs->set_clock_gating(hdev);
8416                 mutex_unlock(&gaudi->clk_gate_mutex);
8417                 return -EIO;
8418         }
8419
8420         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8421                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8422
8423         /* wait a bit for the engine to start executing */
8424         usleep_range(1000, 1500);
8425
8426         /* wait until engine has finished executing */
8427         rc = hl_poll_timeout(
8428                 hdev,
8429                 mmTPC0_CFG_STATUS + offset,
8430                 status,
8431                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8432                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8433                 1000,
8434                 kernel_timeout);
8435
8436         if (rc) {
8437                 dev_err(hdev->dev,
8438                         "Timeout while waiting for TPC%d vector pipe\n",
8439                         tpc_id);
8440                 hdev->asic_funcs->set_clock_gating(hdev);
8441                 mutex_unlock(&gaudi->clk_gate_mutex);
8442                 return -EIO;
8443         }
8444
8445         rc = hl_poll_timeout(
8446                 hdev,
8447                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8448                 status,
8449                 (status == 0),
8450                 1000,
8451                 kernel_timeout);
8452
8453         hdev->asic_funcs->set_clock_gating(hdev);
8454         mutex_unlock(&gaudi->clk_gate_mutex);
8455
8456         if (rc) {
8457                 dev_err(hdev->dev,
8458                         "Timeout while waiting for TPC%d kernel to execute\n",
8459                         tpc_id);
8460                 return -EIO;
8461         }
8462
8463         return 0;
8464 }
8465
8466 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8467                 struct hl_ctx *ctx)
8468 {
8469         struct gaudi_device *gaudi = hdev->asic_specific;
8470         int min_alloc_order, rc, collective_cb_size;
8471
8472         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8473                 return 0;
8474
8475         hdev->internal_cb_pool_virt_addr =
8476                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
8477                                         HOST_SPACE_INTERNAL_CB_SZ,
8478                                         &hdev->internal_cb_pool_dma_addr,
8479                                         GFP_KERNEL | __GFP_ZERO);
8480
8481         if (!hdev->internal_cb_pool_virt_addr)
8482                 return -ENOMEM;
8483
8484         collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8485                         sizeof(struct packet_fence);
8486         min_alloc_order = ilog2(collective_cb_size);
8487
8488         hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8489         if (!hdev->internal_cb_pool) {
8490                 dev_err(hdev->dev,
8491                         "Failed to create internal CB pool\n");
8492                 rc = -ENOMEM;
8493                 goto free_internal_cb_pool;
8494         }
8495
8496         rc = gen_pool_add(hdev->internal_cb_pool,
8497                                 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8498                                 HOST_SPACE_INTERNAL_CB_SZ, -1);
8499         if (rc) {
8500                 dev_err(hdev->dev,
8501                         "Failed to add memory to internal CB pool\n");
8502                 rc = -EFAULT;
8503                 goto destroy_internal_cb_pool;
8504         }
8505
8506         hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8507                         HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8508                         HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8509
8510         if (!hdev->internal_cb_va_base) {
8511                 rc = -ENOMEM;
8512                 goto destroy_internal_cb_pool;
8513         }
8514
8515         mutex_lock(&ctx->mmu_lock);
8516         rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8517                         hdev->internal_cb_pool_dma_addr,
8518                         HOST_SPACE_INTERNAL_CB_SZ);
8519
8520         hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
8521         mutex_unlock(&ctx->mmu_lock);
8522
8523         if (rc)
8524                 goto unreserve_internal_cb_pool;
8525
8526         return 0;
8527
8528 unreserve_internal_cb_pool:
8529         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8530                         HOST_SPACE_INTERNAL_CB_SZ);
8531 destroy_internal_cb_pool:
8532         gen_pool_destroy(hdev->internal_cb_pool);
8533 free_internal_cb_pool:
8534         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8535                         HOST_SPACE_INTERNAL_CB_SZ,
8536                         hdev->internal_cb_pool_virt_addr,
8537                         hdev->internal_cb_pool_dma_addr);
8538
8539         return rc;
8540 }
8541
8542 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8543                 struct hl_ctx *ctx)
8544 {
8545         struct gaudi_device *gaudi = hdev->asic_specific;
8546
8547         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8548                 return;
8549
8550         mutex_lock(&ctx->mmu_lock);
8551         hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8552                         HOST_SPACE_INTERNAL_CB_SZ);
8553         hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8554                         HOST_SPACE_INTERNAL_CB_SZ);
8555         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
8556         mutex_unlock(&ctx->mmu_lock);
8557
8558         gen_pool_destroy(hdev->internal_cb_pool);
8559
8560         hdev->asic_funcs->asic_dma_free_coherent(hdev,
8561                         HOST_SPACE_INTERNAL_CB_SZ,
8562                         hdev->internal_cb_pool_virt_addr,
8563                         hdev->internal_cb_pool_dma_addr);
8564 }
8565
8566 static int gaudi_ctx_init(struct hl_ctx *ctx)
8567 {
8568         if (ctx->asid == HL_KERNEL_ASID_ID)
8569                 return 0;
8570
8571         gaudi_mmu_prepare(ctx->hdev, ctx->asid);
8572         return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8573 }
8574
8575 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8576 {
8577         if (ctx->asid == HL_KERNEL_ASID_ID)
8578                 return;
8579
8580         gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8581 }
8582
8583 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8584 {
8585         return gaudi_cq_assignment[cq_idx];
8586 }
8587
8588 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8589 {
8590         return sizeof(struct packet_msg_short) +
8591                         sizeof(struct packet_msg_prot) * 2;
8592 }
8593
8594 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8595 {
8596         return sizeof(struct packet_msg_short) * 4 +
8597                         sizeof(struct packet_fence) +
8598                         sizeof(struct packet_msg_prot) * 2;
8599 }
8600
8601 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8602                                 u32 size, bool eb)
8603 {
8604         struct hl_cb *cb = (struct hl_cb *) data;
8605         struct packet_msg_short *pkt;
8606         u32 value, ctl, pkt_size = sizeof(*pkt);
8607
8608         pkt = cb->kernel_address + size;
8609         memset(pkt, 0, pkt_size);
8610
8611         /* Inc by 1, Mode ADD */
8612         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8613         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8614
8615         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8616         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8617         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8618         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8619         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8620         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8621         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8622
8623         pkt->value = cpu_to_le32(value);
8624         pkt->ctl = cpu_to_le32(ctl);
8625
8626         return size + pkt_size;
8627 }
8628
8629 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8630                                         u16 addr)
8631 {
8632         u32 ctl, pkt_size = sizeof(*pkt);
8633
8634         memset(pkt, 0, pkt_size);
8635
8636         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8637         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8638         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8639         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8640         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8641         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8642
8643         pkt->value = cpu_to_le32(value);
8644         pkt->ctl = cpu_to_le32(ctl);
8645
8646         return pkt_size;
8647 }
8648
8649 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8650                 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8651                 u16 sob_val, u16 mon_id)
8652 {
8653         u64 monitor_base;
8654         u32 ctl, value, pkt_size = sizeof(*pkt);
8655         u16 msg_addr_offset;
8656         u8 mask;
8657
8658         if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8659                 dev_err(hdev->dev,
8660                         "sob_base %u (mask %#x) is not valid\n",
8661                         sob_base, sob_mask);
8662                 return 0;
8663         }
8664
8665         /*
8666          * monitor_base should be the content of the base0 address registers,
8667          * so it will be added to the msg short offsets
8668          */
8669         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8670
8671         msg_addr_offset =
8672                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8673                                 monitor_base;
8674
8675         memset(pkt, 0, pkt_size);
8676
8677         /* Monitor config packet: bind the monitor to a sync object */
8678         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8679         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8680         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8681                         0); /* GREATER OR EQUAL*/
8682         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8683
8684         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8685         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8686         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8687         ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8688         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8689         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8690         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8691
8692         pkt->value = cpu_to_le32(value);
8693         pkt->ctl = cpu_to_le32(ctl);
8694
8695         return pkt_size;
8696 }
8697
8698 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8699 {
8700         u32 ctl, cfg, pkt_size = sizeof(*pkt);
8701
8702         memset(pkt, 0, pkt_size);
8703
8704         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8705         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8706         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8707
8708         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8709         ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8710         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8711         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8712
8713         pkt->cfg = cpu_to_le32(cfg);
8714         pkt->ctl = cpu_to_le32(ctl);
8715
8716         return pkt_size;
8717 }
8718
8719 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8720 {
8721         u32 offset, nic_index;
8722
8723         switch (queue_id) {
8724         case GAUDI_QUEUE_ID_DMA_0_0:
8725                 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8726                 break;
8727         case GAUDI_QUEUE_ID_DMA_0_1:
8728                 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8729                 break;
8730         case GAUDI_QUEUE_ID_DMA_0_2:
8731                 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8732                 break;
8733         case GAUDI_QUEUE_ID_DMA_0_3:
8734                 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8735                 break;
8736         case GAUDI_QUEUE_ID_DMA_1_0:
8737                 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8738                 break;
8739         case GAUDI_QUEUE_ID_DMA_1_1:
8740                 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8741                 break;
8742         case GAUDI_QUEUE_ID_DMA_1_2:
8743                 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8744                 break;
8745         case GAUDI_QUEUE_ID_DMA_1_3:
8746                 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8747                 break;
8748         case GAUDI_QUEUE_ID_DMA_5_0:
8749                 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8750                 break;
8751         case GAUDI_QUEUE_ID_DMA_5_1:
8752                 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8753                 break;
8754         case GAUDI_QUEUE_ID_DMA_5_2:
8755                 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8756                 break;
8757         case GAUDI_QUEUE_ID_DMA_5_3:
8758                 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8759                 break;
8760         case GAUDI_QUEUE_ID_TPC_7_0:
8761                 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8762                 break;
8763         case GAUDI_QUEUE_ID_TPC_7_1:
8764                 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8765                 break;
8766         case GAUDI_QUEUE_ID_TPC_7_2:
8767                 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8768                 break;
8769         case GAUDI_QUEUE_ID_TPC_7_3:
8770                 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8771                 break;
8772         case GAUDI_QUEUE_ID_NIC_0_0:
8773         case GAUDI_QUEUE_ID_NIC_1_0:
8774         case GAUDI_QUEUE_ID_NIC_2_0:
8775         case GAUDI_QUEUE_ID_NIC_3_0:
8776         case GAUDI_QUEUE_ID_NIC_4_0:
8777         case GAUDI_QUEUE_ID_NIC_5_0:
8778         case GAUDI_QUEUE_ID_NIC_6_0:
8779         case GAUDI_QUEUE_ID_NIC_7_0:
8780         case GAUDI_QUEUE_ID_NIC_8_0:
8781         case GAUDI_QUEUE_ID_NIC_9_0:
8782                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8783                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8784                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8785                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8786                 break;
8787         case GAUDI_QUEUE_ID_NIC_0_1:
8788         case GAUDI_QUEUE_ID_NIC_1_1:
8789         case GAUDI_QUEUE_ID_NIC_2_1:
8790         case GAUDI_QUEUE_ID_NIC_3_1:
8791         case GAUDI_QUEUE_ID_NIC_4_1:
8792         case GAUDI_QUEUE_ID_NIC_5_1:
8793         case GAUDI_QUEUE_ID_NIC_6_1:
8794         case GAUDI_QUEUE_ID_NIC_7_1:
8795         case GAUDI_QUEUE_ID_NIC_8_1:
8796         case GAUDI_QUEUE_ID_NIC_9_1:
8797                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8798                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8799                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8800                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8801                 break;
8802         case GAUDI_QUEUE_ID_NIC_0_2:
8803         case GAUDI_QUEUE_ID_NIC_1_2:
8804         case GAUDI_QUEUE_ID_NIC_2_2:
8805         case GAUDI_QUEUE_ID_NIC_3_2:
8806         case GAUDI_QUEUE_ID_NIC_4_2:
8807         case GAUDI_QUEUE_ID_NIC_5_2:
8808         case GAUDI_QUEUE_ID_NIC_6_2:
8809         case GAUDI_QUEUE_ID_NIC_7_2:
8810         case GAUDI_QUEUE_ID_NIC_8_2:
8811         case GAUDI_QUEUE_ID_NIC_9_2:
8812                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8813                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8814                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8815                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8816                 break;
8817         case GAUDI_QUEUE_ID_NIC_0_3:
8818         case GAUDI_QUEUE_ID_NIC_1_3:
8819         case GAUDI_QUEUE_ID_NIC_2_3:
8820         case GAUDI_QUEUE_ID_NIC_3_3:
8821         case GAUDI_QUEUE_ID_NIC_4_3:
8822         case GAUDI_QUEUE_ID_NIC_5_3:
8823         case GAUDI_QUEUE_ID_NIC_6_3:
8824         case GAUDI_QUEUE_ID_NIC_7_3:
8825         case GAUDI_QUEUE_ID_NIC_8_3:
8826         case GAUDI_QUEUE_ID_NIC_9_3:
8827                 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8828                 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8829                                 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8830                                 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8831                 break;
8832         default:
8833                 return -EINVAL;
8834         }
8835
8836         *addr = CFG_BASE + offset;
8837
8838         return 0;
8839 }
8840
8841 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8842 {
8843         u64 monitor_base;
8844         u32 size = 0;
8845         u16 msg_addr_offset;
8846
8847         /*
8848          * monitor_base should be the content of the base0 address registers,
8849          * so it will be added to the msg short offsets
8850          */
8851         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8852
8853         /* First monitor config packet: low address of the sync */
8854         msg_addr_offset =
8855                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8856                                 monitor_base;
8857
8858         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8859                                         msg_addr_offset);
8860
8861         /* Second monitor config packet: high address of the sync */
8862         msg_addr_offset =
8863                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8864                                 monitor_base;
8865
8866         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8867                                         msg_addr_offset);
8868
8869         /*
8870          * Third monitor config packet: the payload, i.e. what to write when the
8871          * sync triggers
8872          */
8873         msg_addr_offset =
8874                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8875                                 monitor_base;
8876
8877         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8878
8879         return size;
8880 }
8881
8882 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8883                                 struct hl_gen_wait_properties *prop)
8884 {
8885         struct hl_cb *cb = (struct hl_cb *) prop->data;
8886         void *buf = cb->kernel_address;
8887         u64 fence_addr = 0;
8888         u32 size = prop->size;
8889
8890         if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8891                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8892                                 prop->q_idx);
8893                 return 0;
8894         }
8895
8896         size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8897         size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8898                         prop->sob_mask, prop->sob_val, prop->mon_id);
8899         size += gaudi_add_fence_pkt(buf + size);
8900
8901         return size;
8902 }
8903
8904 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8905 {
8906         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8907         int rc;
8908
8909         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8910                 hw_sob->sob_id);
8911
8912         rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
8913                         CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8914                         hw_sob->sob_id * 4, 1, 0);
8915         if (rc)
8916                 dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
8917
8918         kref_init(&hw_sob->kref);
8919 }
8920
8921 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
8922 {
8923         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
8924                                                         HL_POWER9_HOST_MAGIC) {
8925                 hdev->power9_64bit_dma_enable = 1;
8926                 hdev->dma_mask = 64;
8927         } else {
8928                 hdev->power9_64bit_dma_enable = 0;
8929                 hdev->dma_mask = 48;
8930         }
8931 }
8932
8933 static u64 gaudi_get_device_time(struct hl_device *hdev)
8934 {
8935         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8936
8937         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8938 }
8939
8940 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8941                                 u32 *block_size, u32 *block_id)
8942 {
8943         return -EPERM;
8944 }
8945
8946 static int gaudi_block_mmap(struct hl_device *hdev,
8947                                 struct vm_area_struct *vma,
8948                                 u32 block_id, u32 block_size)
8949 {
8950         return -EPERM;
8951 }
8952
8953 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8954 {
8955         struct cpu_dyn_regs *dyn_regs =
8956                         &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8957         u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8958                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8959                         le32_to_cpu(dyn_regs->gic_host_ints_irq);
8960
8961         WREG32(irq_handler_offset,
8962                 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8963 }
8964
8965 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8966 {
8967         switch (pll_idx) {
8968         case HL_GAUDI_CPU_PLL: return CPU_PLL;
8969         case HL_GAUDI_PCI_PLL: return PCI_PLL;
8970         case HL_GAUDI_NIC_PLL: return NIC_PLL;
8971         case HL_GAUDI_DMA_PLL: return DMA_PLL;
8972         case HL_GAUDI_MESH_PLL: return MESH_PLL;
8973         case HL_GAUDI_MME_PLL: return MME_PLL;
8974         case HL_GAUDI_TPC_PLL: return TPC_PLL;
8975         case HL_GAUDI_IF_PLL: return IF_PLL;
8976         case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8977         case HL_GAUDI_HBM_PLL: return HBM_PLL;
8978         default: return -EINVAL;
8979         }
8980 }
8981
8982 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8983                                 struct hl_sync_to_engine_map *map)
8984 {
8985         /* Not implemented */
8986         return 0;
8987 }
8988
8989 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8990 {
8991         /* Not implemented */
8992         return 0;
8993 }
8994
8995 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8996                                 struct hl_device *hdev,
8997                                 struct hl_mon_state_dump *mon)
8998 {
8999         /* Not implemented */
9000         return 0;
9001 }
9002
9003
9004 static int gaudi_print_fences_single_engine(
9005         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
9006         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
9007         size_t *size, size_t *offset)
9008 {
9009         /* Not implemented */
9010         return 0;
9011 }
9012
9013
9014 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9015         .monitor_valid = gaudi_monitor_valid,
9016         .print_single_monitor = gaudi_print_single_monitor,
9017         .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9018         .print_fences_single_engine = gaudi_print_fences_single_engine,
9019 };
9020
9021 static void gaudi_state_dump_init(struct hl_device *hdev)
9022 {
9023         /* Not implemented */
9024         hdev->state_dump_specs.props = gaudi_state_dump_specs_props;
9025         hdev->state_dump_specs.funcs = gaudi_state_dump_funcs;
9026 }
9027
9028 static const struct hl_asic_funcs gaudi_funcs = {
9029         .early_init = gaudi_early_init,
9030         .early_fini = gaudi_early_fini,
9031         .late_init = gaudi_late_init,
9032         .late_fini = gaudi_late_fini,
9033         .sw_init = gaudi_sw_init,
9034         .sw_fini = gaudi_sw_fini,
9035         .hw_init = gaudi_hw_init,
9036         .hw_fini = gaudi_hw_fini,
9037         .halt_engines = gaudi_halt_engines,
9038         .suspend = gaudi_suspend,
9039         .resume = gaudi_resume,
9040         .cb_mmap = gaudi_cb_mmap,
9041         .ring_doorbell = gaudi_ring_doorbell,
9042         .pqe_write = gaudi_pqe_write,
9043         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9044         .asic_dma_free_coherent = gaudi_dma_free_coherent,
9045         .scrub_device_mem = gaudi_scrub_device_mem,
9046         .get_int_queue_base = gaudi_get_int_queue_base,
9047         .test_queues = gaudi_test_queues,
9048         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9049         .asic_dma_pool_free = gaudi_dma_pool_free,
9050         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9051         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9052         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
9053         .cs_parser = gaudi_cs_parser,
9054         .asic_dma_map_sg = gaudi_dma_map_sg,
9055         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
9056         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9057         .update_eq_ci = gaudi_update_eq_ci,
9058         .context_switch = gaudi_context_switch,
9059         .restore_phase_topology = gaudi_restore_phase_topology,
9060         .debugfs_read32 = gaudi_debugfs_read32,
9061         .debugfs_write32 = gaudi_debugfs_write32,
9062         .debugfs_read64 = gaudi_debugfs_read64,
9063         .debugfs_write64 = gaudi_debugfs_write64,
9064         .debugfs_read_dma = gaudi_debugfs_read_dma,
9065         .add_device_attr = gaudi_add_device_attr,
9066         .handle_eqe = gaudi_handle_eqe,
9067         .set_pll_profile = gaudi_set_pll_profile,
9068         .get_events_stat = gaudi_get_events_stat,
9069         .read_pte = gaudi_read_pte,
9070         .write_pte = gaudi_write_pte,
9071         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9072         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9073         .send_heartbeat = gaudi_send_heartbeat,
9074         .set_clock_gating = gaudi_set_clock_gating,
9075         .disable_clock_gating = gaudi_disable_clock_gating,
9076         .debug_coresight = gaudi_debug_coresight,
9077         .is_device_idle = gaudi_is_device_idle,
9078         .soft_reset_late_init = gaudi_soft_reset_late_init,
9079         .hw_queues_lock = gaudi_hw_queues_lock,
9080         .hw_queues_unlock = gaudi_hw_queues_unlock,
9081         .get_pci_id = gaudi_get_pci_id,
9082         .get_eeprom_data = gaudi_get_eeprom_data,
9083         .send_cpu_message = gaudi_send_cpu_message,
9084         .pci_bars_map = gaudi_pci_bars_map,
9085         .init_iatu = gaudi_init_iatu,
9086         .rreg = hl_rreg,
9087         .wreg = hl_wreg,
9088         .halt_coresight = gaudi_halt_coresight,
9089         .ctx_init = gaudi_ctx_init,
9090         .ctx_fini = gaudi_ctx_fini,
9091         .get_clk_rate = gaudi_get_clk_rate,
9092         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9093         .load_firmware_to_device = gaudi_load_firmware_to_device,
9094         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9095         .get_signal_cb_size = gaudi_get_signal_cb_size,
9096         .get_wait_cb_size = gaudi_get_wait_cb_size,
9097         .gen_signal_cb = gaudi_gen_signal_cb,
9098         .gen_wait_cb = gaudi_gen_wait_cb,
9099         .reset_sob = gaudi_reset_sob,
9100         .reset_sob_group = gaudi_reset_sob_group,
9101         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
9102         .get_device_time = gaudi_get_device_time,
9103         .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9104         .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9105         .scramble_addr = hl_mmu_scramble_addr,
9106         .descramble_addr = hl_mmu_descramble_addr,
9107         .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9108         .get_hw_block_id = gaudi_get_hw_block_id,
9109         .hw_block_mmap = gaudi_block_mmap,
9110         .enable_events_from_fw = gaudi_enable_events_from_fw,
9111         .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9112         .init_firmware_loader = gaudi_init_firmware_loader,
9113         .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9114         .state_dump_init = gaudi_state_dump_init
9115 };
9116
9117 /**
9118  * gaudi_set_asic_funcs - set GAUDI function pointers
9119  *
9120  * @hdev: pointer to hl_device structure
9121  *
9122  */
9123 void gaudi_set_asic_funcs(struct hl_device *hdev)
9124 {
9125         hdev->asic_funcs = &gaudi_funcs;
9126 }