habanalabs: Save context in a command buffer object
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / gaudi / gaudi.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2020 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/genalloc.h>
21 #include <linux/io-64-nonatomic-lo-hi.h>
22 #include <linux/iommu.h>
23 #include <linux/seq_file.h>
24
25 /*
26  * Gaudi security scheme:
27  *
28  * 1. Host is protected by:
29  *        - Range registers
30  *        - MMU
31  *
32  * 2. DDR is protected by:
33  *        - Range registers (protect the first 512MB)
34  *
35  * 3. Configuration is protected by:
36  *        - Range registers
37  *        - Protection bits
38  *
39  * MMU is always enabled.
40  *
41  * QMAN DMA channels 0,1,5 (PCI DMAN):
42  *     - DMA is not secured.
43  *     - PQ and CQ are secured.
44  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
45  *                      because of TDMA (tensor DMA). Hence, WREG is always not
46  *                      secured.
47  *
48  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49  * channel 0 to be secured, execute the DMA and change it back to not secured.
50  * Currently, the driver doesn't use the DMA while there are compute jobs
51  * running.
52  *
53  * The current use cases for the driver to use the DMA are:
54  *     - Clear SRAM on context switch (happens on context switch when device is
55  *       idle)
56  *     - MMU page tables area clear (happens on init)
57  *
58  * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60  * CQ, CP and the engine are not secured
61  *
62  */
63
64 #define GAUDI_BOOT_FIT_FILE     "habanalabs/gaudi/gaudi-boot-fit.itb"
65 #define GAUDI_LINUX_FW_FILE     "habanalabs/gaudi/gaudi-fit.itb"
66 #define GAUDI_TPC_FW_FILE       "habanalabs/gaudi/gaudi_tpc.bin"
67
68 #define GAUDI_DMA_POOL_BLK_SIZE         0x100 /* 256 bytes */
69
70 #define GAUDI_RESET_TIMEOUT_MSEC        1000            /* 1000ms */
71 #define GAUDI_RESET_WAIT_MSEC           1               /* 1ms */
72 #define GAUDI_CPU_RESET_WAIT_MSEC       200             /* 200ms */
73 #define GAUDI_TEST_QUEUE_WAIT_USEC      100000          /* 100ms */
74
75 #define GAUDI_PLDM_RESET_WAIT_MSEC      1000            /* 1s */
76 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC  20000           /* 20s */
77 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000         /* 1s */
78 #define GAUDI_PLDM_MMU_TIMEOUT_USEC     (MMU_CONFIG_TIMEOUT_USEC * 100)
79 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC   (HL_DEVICE_TIMEOUT_USEC * 30)
80 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000         /* 1s */
82 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC   4000000         /* 4s */
83
84 #define GAUDI_QMAN0_FENCE_VAL           0x72E91AB9
85
86 #define GAUDI_MAX_STRING_LEN            20
87
88 #define GAUDI_CB_POOL_CB_CNT            512
89 #define GAUDI_CB_POOL_CB_SIZE           0x20000 /* 128KB */
90
91 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT   3
92
93 #define GAUDI_NUM_OF_TPC_INTR_CAUSE     20
94
95 #define GAUDI_NUM_OF_QM_ERR_CAUSE       16
96
97 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE   3
98
99 #define GAUDI_ARB_WDT_TIMEOUT           0x1000000
100
101 #define GAUDI_CLK_GATE_DEBUGFS_MASK     (\
102                 BIT(GAUDI_ENGINE_ID_MME_0) |\
103                 BIT(GAUDI_ENGINE_ID_MME_2) |\
104                 GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0))
105
106 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
107                 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
108                 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
109                 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
110                 "gaudi cpu eq"
111 };
112
113 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
114         [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
115         [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
116         [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5,
117         [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
118         [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
119         [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
120         [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6,
121         [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7
122 };
123
124 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
125         [0] = GAUDI_QUEUE_ID_DMA_0_0,
126         [1] = GAUDI_QUEUE_ID_DMA_0_1,
127         [2] = GAUDI_QUEUE_ID_DMA_0_2,
128         [3] = GAUDI_QUEUE_ID_DMA_0_3,
129         [4] = GAUDI_QUEUE_ID_DMA_1_0,
130         [5] = GAUDI_QUEUE_ID_DMA_1_1,
131         [6] = GAUDI_QUEUE_ID_DMA_1_2,
132         [7] = GAUDI_QUEUE_ID_DMA_1_3,
133         [8] = GAUDI_QUEUE_ID_DMA_5_0,
134         [9] = GAUDI_QUEUE_ID_DMA_5_1,
135         [10] = GAUDI_QUEUE_ID_DMA_5_2,
136         [11] = GAUDI_QUEUE_ID_DMA_5_3
137 };
138
139 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
140         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
141         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
142         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
143         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
144         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
145         [PACKET_REPEAT]         = sizeof(struct packet_repeat),
146         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
147         [PACKET_FENCE]          = sizeof(struct packet_fence),
148         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
149         [PACKET_NOP]            = sizeof(struct packet_nop),
150         [PACKET_STOP]           = sizeof(struct packet_stop),
151         [PACKET_ARB_POINT]      = sizeof(struct packet_arb_point),
152         [PACKET_WAIT]           = sizeof(struct packet_wait),
153         [PACKET_LOAD_AND_EXE]   = sizeof(struct packet_load_and_exe)
154 };
155
156 static inline bool validate_packet_id(enum packet_id id)
157 {
158         switch (id) {
159         case PACKET_WREG_32:
160         case PACKET_WREG_BULK:
161         case PACKET_MSG_LONG:
162         case PACKET_MSG_SHORT:
163         case PACKET_CP_DMA:
164         case PACKET_REPEAT:
165         case PACKET_MSG_PROT:
166         case PACKET_FENCE:
167         case PACKET_LIN_DMA:
168         case PACKET_NOP:
169         case PACKET_STOP:
170         case PACKET_ARB_POINT:
171         case PACKET_WAIT:
172         case PACKET_LOAD_AND_EXE:
173                 return true;
174         default:
175                 return false;
176         }
177 }
178
179 static const char * const
180 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
181         "tpc_address_exceed_slm",
182         "tpc_div_by_0",
183         "tpc_spu_mac_overflow",
184         "tpc_spu_addsub_overflow",
185         "tpc_spu_abs_overflow",
186         "tpc_spu_fp_dst_nan_inf",
187         "tpc_spu_fp_dst_denorm",
188         "tpc_vpu_mac_overflow",
189         "tpc_vpu_addsub_overflow",
190         "tpc_vpu_abs_overflow",
191         "tpc_vpu_fp_dst_nan_inf",
192         "tpc_vpu_fp_dst_denorm",
193         "tpc_assertions",
194         "tpc_illegal_instruction",
195         "tpc_pc_wrap_around",
196         "tpc_qm_sw_err",
197         "tpc_hbw_rresp_err",
198         "tpc_hbw_bresp_err",
199         "tpc_lbw_rresp_err",
200         "tpc_lbw_bresp_err"
201 };
202
203 static const char * const
204 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
205         "PQ AXI HBW error",
206         "CQ AXI HBW error",
207         "CP AXI HBW error",
208         "CP error due to undefined OPCODE",
209         "CP encountered STOP OPCODE",
210         "CP AXI LBW error",
211         "CP WRREG32 or WRBULK returned error",
212         "N/A",
213         "FENCE 0 inc over max value and clipped",
214         "FENCE 1 inc over max value and clipped",
215         "FENCE 2 inc over max value and clipped",
216         "FENCE 3 inc over max value and clipped",
217         "FENCE 0 dec under min value and clipped",
218         "FENCE 1 dec under min value and clipped",
219         "FENCE 2 dec under min value and clipped",
220         "FENCE 3 dec under min value and clipped"
221 };
222
223 static const char * const
224 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
225         "Choice push while full error",
226         "Choice Q watchdog error",
227         "MSG AXI LBW returned with error"
228 };
229
230 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
231         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
232         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
233         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
234         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
235         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
236         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
237         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
238         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
239         QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
240         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
241         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
242         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
243         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
244         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
245         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
246         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
247         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
248         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
249         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
250         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
251         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
252         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
253         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
254         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
255         QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
256         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
257         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
258         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
259         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
260         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
261         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
262         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
263         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
264         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
265         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
266         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
267         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
268         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
269         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
270         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
271         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
272         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
273         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
274         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
275         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
276         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
277         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
278         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
279         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
280         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
281         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
282         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
283         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
284         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
285         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
286         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
287         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
288         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
289         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
290         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
291         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
292         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
293         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
294         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
295         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
296         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
297         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
298         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
299         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
300         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
301         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
302         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
303         QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
304         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_0 */
305         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_1 */
306         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_2 */
307         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_0_3 */
308         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_0 */
309         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_1 */
310         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_2 */
311         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_1_3 */
312         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_0 */
313         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_1 */
314         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_2 */
315         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_2_3 */
316         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_0 */
317         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_1 */
318         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_2 */
319         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_3_3 */
320         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_0 */
321         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_1 */
322         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_2 */
323         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_4_3 */
324         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_0 */
325         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_1 */
326         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_2 */
327         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_5_3 */
328         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_0 */
329         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_1 */
330         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_2 */
331         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_6_3 */
332         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_0 */
333         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_1 */
334         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_2 */
335         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_7_3 */
336         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_0 */
337         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_1 */
338         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_2 */
339         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_8_3 */
340         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_0 */
341         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_1 */
342         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_2 */
343         QUEUE_TYPE_NA,  /* GAUDI_QUEUE_ID_NIC_9_3 */
344 };
345
346 struct ecc_info_extract_params {
347         u64 block_address;
348         u32 num_memories;
349         bool derr;
350         bool disable_clock_gating;
351 };
352
353 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
354                                                                 u64 phys_addr);
355 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
356                                         struct hl_cs_job *job);
357 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
358                                         u32 size, u64 val);
359 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
360                                 u32 tpc_id);
361 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
362 static int gaudi_cpucp_info_get(struct hl_device *hdev);
363 static void gaudi_disable_clock_gating(struct hl_device *hdev);
364 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
365
366 static int gaudi_get_fixed_properties(struct hl_device *hdev)
367 {
368         struct asic_fixed_properties *prop = &hdev->asic_prop;
369         u32 num_sync_stream_queues = 0;
370         int i;
371
372         prop->max_queues = GAUDI_QUEUE_ID_SIZE;
373         prop->hw_queues_props = kcalloc(prop->max_queues,
374                         sizeof(struct hw_queue_properties),
375                         GFP_KERNEL);
376
377         if (!prop->hw_queues_props)
378                 return -ENOMEM;
379
380         for (i = 0 ; i < prop->max_queues ; i++) {
381                 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
382                         prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
383                         prop->hw_queues_props[i].driver_only = 0;
384                         prop->hw_queues_props[i].requires_kernel_cb = 1;
385                         prop->hw_queues_props[i].supports_sync_stream = 1;
386                         num_sync_stream_queues++;
387                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
388                         prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
389                         prop->hw_queues_props[i].driver_only = 1;
390                         prop->hw_queues_props[i].requires_kernel_cb = 0;
391                         prop->hw_queues_props[i].supports_sync_stream = 0;
392                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
393                         prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
394                         prop->hw_queues_props[i].driver_only = 0;
395                         prop->hw_queues_props[i].requires_kernel_cb = 0;
396                 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
397                         prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
398                         prop->hw_queues_props[i].driver_only = 0;
399                         prop->hw_queues_props[i].requires_kernel_cb = 0;
400                         prop->hw_queues_props[i].supports_sync_stream = 0;
401                 }
402         }
403
404         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
405         prop->sync_stream_first_sob = 0;
406         prop->sync_stream_first_mon = 0;
407         prop->dram_base_address = DRAM_PHYS_BASE;
408         prop->dram_size = GAUDI_HBM_SIZE_32GB;
409         prop->dram_end_address = prop->dram_base_address +
410                                         prop->dram_size;
411         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
412
413         prop->sram_base_address = SRAM_BASE_ADDR;
414         prop->sram_size = SRAM_SIZE;
415         prop->sram_end_address = prop->sram_base_address +
416                                         prop->sram_size;
417         prop->sram_user_base_address = prop->sram_base_address +
418                                         SRAM_USER_BASE_OFFSET;
419
420         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
421         if (hdev->pldm)
422                 prop->mmu_pgt_size = 0x800000; /* 8MB */
423         else
424                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
425         prop->mmu_pte_size = HL_PTE_SIZE;
426         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
427         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
428         prop->dram_page_size = PAGE_SIZE_2MB;
429
430         prop->pmmu.hop0_shift = HOP0_SHIFT;
431         prop->pmmu.hop1_shift = HOP1_SHIFT;
432         prop->pmmu.hop2_shift = HOP2_SHIFT;
433         prop->pmmu.hop3_shift = HOP3_SHIFT;
434         prop->pmmu.hop4_shift = HOP4_SHIFT;
435         prop->pmmu.hop0_mask = HOP0_MASK;
436         prop->pmmu.hop1_mask = HOP1_MASK;
437         prop->pmmu.hop2_mask = HOP2_MASK;
438         prop->pmmu.hop3_mask = HOP3_MASK;
439         prop->pmmu.hop4_mask = HOP4_MASK;
440         prop->pmmu.start_addr = VA_HOST_SPACE_START;
441         prop->pmmu.end_addr =
442                         (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
443         prop->pmmu.page_size = PAGE_SIZE_4KB;
444         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
445
446         /* PMMU and HPMMU are the same except of page size */
447         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
448         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
449
450         /* shifts and masks are the same in PMMU and DMMU */
451         memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
452         prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
453         prop->dmmu.end_addr = VA_HOST_SPACE_END;
454         prop->dmmu.page_size = PAGE_SIZE_2MB;
455
456         prop->cfg_size = CFG_SIZE;
457         prop->max_asid = MAX_ASID;
458         prop->num_of_events = GAUDI_EVENT_SIZE;
459         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
460
461         prop->max_power_default = MAX_POWER_DEFAULT_PCI;
462
463         prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
464         prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
465
466         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
467         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
468
469         strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
470                                         CARD_NAME_MAX_LEN);
471
472         prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
473
474         prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
475                         num_sync_stream_queues * HL_RSVD_SOBS;
476         prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
477                         num_sync_stream_queues * HL_RSVD_MONS;
478
479         return 0;
480 }
481
482 static int gaudi_pci_bars_map(struct hl_device *hdev)
483 {
484         static const char * const name[] = {"SRAM", "CFG", "HBM"};
485         bool is_wc[3] = {false, false, true};
486         int rc;
487
488         rc = hl_pci_bars_map(hdev, name, is_wc);
489         if (rc)
490                 return rc;
491
492         hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
493                         (CFG_BASE - SPI_FLASH_BASE_ADDR);
494
495         return 0;
496 }
497
498 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
499 {
500         struct gaudi_device *gaudi = hdev->asic_specific;
501         struct hl_inbound_pci_region pci_region;
502         u64 old_addr = addr;
503         int rc;
504
505         if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
506                 return old_addr;
507
508         /* Inbound Region 2 - Bar 4 - Point to HBM */
509         pci_region.mode = PCI_BAR_MATCH_MODE;
510         pci_region.bar = HBM_BAR_ID;
511         pci_region.addr = addr;
512         rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
513         if (rc)
514                 return U64_MAX;
515
516         if (gaudi) {
517                 old_addr = gaudi->hbm_bar_cur_addr;
518                 gaudi->hbm_bar_cur_addr = addr;
519         }
520
521         return old_addr;
522 }
523
524 static int gaudi_init_iatu(struct hl_device *hdev)
525 {
526         struct hl_inbound_pci_region inbound_region;
527         struct hl_outbound_pci_region outbound_region;
528         int rc;
529
530         /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
531         inbound_region.mode = PCI_BAR_MATCH_MODE;
532         inbound_region.bar = SRAM_BAR_ID;
533         inbound_region.addr = SRAM_BASE_ADDR;
534         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
535         if (rc)
536                 goto done;
537
538         /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
539         inbound_region.mode = PCI_BAR_MATCH_MODE;
540         inbound_region.bar = CFG_BAR_ID;
541         inbound_region.addr = SPI_FLASH_BASE_ADDR;
542         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
543         if (rc)
544                 goto done;
545
546         /* Inbound Region 2 - Bar 4 - Point to HBM */
547         inbound_region.mode = PCI_BAR_MATCH_MODE;
548         inbound_region.bar = HBM_BAR_ID;
549         inbound_region.addr = DRAM_PHYS_BASE;
550         rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
551         if (rc)
552                 goto done;
553
554         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
555
556         /* Outbound Region 0 - Point to Host */
557         outbound_region.addr = HOST_PHYS_BASE;
558         outbound_region.size = HOST_PHYS_SIZE;
559         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
560
561 done:
562         return rc;
563 }
564
565 static int gaudi_early_init(struct hl_device *hdev)
566 {
567         struct asic_fixed_properties *prop = &hdev->asic_prop;
568         struct pci_dev *pdev = hdev->pdev;
569         int rc;
570
571         rc = gaudi_get_fixed_properties(hdev);
572         if (rc) {
573                 dev_err(hdev->dev, "Failed to get fixed properties\n");
574                 return rc;
575         }
576
577         /* Check BAR sizes */
578         if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
579                 dev_err(hdev->dev,
580                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
581                         SRAM_BAR_ID,
582                         (unsigned long long) pci_resource_len(pdev,
583                                                         SRAM_BAR_ID),
584                         SRAM_BAR_SIZE);
585                 rc = -ENODEV;
586                 goto free_queue_props;
587         }
588
589         if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
590                 dev_err(hdev->dev,
591                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
592                         CFG_BAR_ID,
593                         (unsigned long long) pci_resource_len(pdev,
594                                                                 CFG_BAR_ID),
595                         CFG_BAR_SIZE);
596                 rc = -ENODEV;
597                 goto free_queue_props;
598         }
599
600         prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
601
602         rc = hl_pci_init(hdev);
603         if (rc)
604                 goto free_queue_props;
605
606         return 0;
607
608 free_queue_props:
609         kfree(hdev->asic_prop.hw_queues_props);
610         return rc;
611 }
612
613 static int gaudi_early_fini(struct hl_device *hdev)
614 {
615         kfree(hdev->asic_prop.hw_queues_props);
616         hl_pci_fini(hdev);
617
618         return 0;
619 }
620
621 /**
622  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
623  *
624  * @hdev: pointer to hl_device structure
625  *
626  */
627 static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
628 {
629         struct asic_fixed_properties *prop = &hdev->asic_prop;
630         u32 trace_freq = 0;
631         u32 pll_clk = 0;
632         u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
633         u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
634         u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
635         u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
636         u32 od = RREG32(mmPSOC_CPU_PLL_OD);
637
638         if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
639                 if (div_sel == DIV_SEL_REF_CLK)
640                         trace_freq = PLL_REF_CLK;
641                 else
642                         trace_freq = PLL_REF_CLK / (div_fctr + 1);
643         } else if (div_sel == DIV_SEL_PLL_CLK ||
644                                         div_sel == DIV_SEL_DIVIDED_PLL) {
645                 pll_clk = PLL_REF_CLK * (nf + 1) / ((nr + 1) * (od + 1));
646                 if (div_sel == DIV_SEL_PLL_CLK)
647                         trace_freq = pll_clk;
648                 else
649                         trace_freq = pll_clk / (div_fctr + 1);
650         } else {
651                 dev_warn(hdev->dev,
652                         "Received invalid div select value: %d", div_sel);
653         }
654
655         prop->psoc_timestamp_frequency = trace_freq;
656         prop->psoc_pci_pll_nr = nr;
657         prop->psoc_pci_pll_nf = nf;
658         prop->psoc_pci_pll_od = od;
659         prop->psoc_pci_pll_div_factor = div_fctr;
660 }
661
662 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
663                 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
664 {
665         struct asic_fixed_properties *prop = &hdev->asic_prop;
666         struct packet_lin_dma *init_tpc_mem_pkt;
667         struct hl_cs_job *job;
668         struct hl_cb *cb;
669         u64 dst_addr;
670         u32 cb_size, ctl;
671         u8 tpc_id;
672         int rc;
673
674         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
675         if (!cb)
676                 return -EFAULT;
677
678         init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
679                                         cb->kernel_address;
680         cb_size = sizeof(*init_tpc_mem_pkt);
681         memset(init_tpc_mem_pkt, 0, cb_size);
682
683         init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
684
685         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
686         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
687         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
688         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
689
690         init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
691
692         init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
693         dst_addr = (prop->sram_user_base_address &
694                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
695                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
696         init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
697
698         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
699         if (!job) {
700                 dev_err(hdev->dev, "Failed to allocate a new job\n");
701                 rc = -ENOMEM;
702                 goto release_cb;
703         }
704
705         job->id = 0;
706         job->user_cb = cb;
707         job->user_cb->cs_cnt++;
708         job->user_cb_size = cb_size;
709         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
710         job->patched_cb = job->user_cb;
711         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
712
713         hl_debugfs_add_job(hdev, job);
714
715         rc = gaudi_send_job_on_qman0(hdev, job);
716
717         if (rc)
718                 goto free_job;
719
720         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
721                 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
722                 if (rc)
723                         break;
724         }
725
726 free_job:
727         hl_userptr_delete_list(hdev, &job->userptr_list);
728         hl_debugfs_remove_job(hdev, job);
729         kfree(job);
730         cb->cs_cnt--;
731
732 release_cb:
733         hl_cb_put(cb);
734         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
735
736         return rc;
737 }
738
739 /*
740  * gaudi_init_tpc_mem() - Initialize TPC memories.
741  * @hdev: Pointer to hl_device structure.
742  *
743  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
744  *
745  * Return: 0 for success, negative value for error.
746  */
747 static int gaudi_init_tpc_mem(struct hl_device *hdev)
748 {
749         const struct firmware *fw;
750         size_t fw_size;
751         void *cpu_addr;
752         dma_addr_t dma_handle;
753         int rc;
754
755         rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
756         if (rc) {
757                 dev_err(hdev->dev, "Firmware file %s is not found!\n",
758                                 GAUDI_TPC_FW_FILE);
759                 goto out;
760         }
761
762         fw_size = fw->size;
763         cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
764                         &dma_handle, GFP_KERNEL | __GFP_ZERO);
765         if (!cpu_addr) {
766                 dev_err(hdev->dev,
767                         "Failed to allocate %zu of dma memory for TPC kernel\n",
768                         fw_size);
769                 rc = -ENOMEM;
770                 goto out;
771         }
772
773         memcpy(cpu_addr, fw->data, fw_size);
774
775         rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
776
777         hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
778                         dma_handle);
779
780 out:
781         release_firmware(fw);
782         return rc;
783 }
784
785 static int gaudi_late_init(struct hl_device *hdev)
786 {
787         struct gaudi_device *gaudi = hdev->asic_specific;
788         int rc;
789
790         rc = gaudi->cpucp_info_get(hdev);
791         if (rc) {
792                 dev_err(hdev->dev, "Failed to get cpucp info\n");
793                 return rc;
794         }
795
796         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
797         if (rc) {
798                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
799                 return rc;
800         }
801
802         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
803
804         gaudi_fetch_psoc_frequency(hdev);
805
806         rc = gaudi_mmu_clear_pgt_range(hdev);
807         if (rc) {
808                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
809                 goto disable_pci_access;
810         }
811
812         rc = gaudi_init_tpc_mem(hdev);
813         if (rc) {
814                 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
815                 goto disable_pci_access;
816         }
817
818         return 0;
819
820 disable_pci_access:
821         hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
822
823         return rc;
824 }
825
826 static void gaudi_late_fini(struct hl_device *hdev)
827 {
828         const struct hwmon_channel_info **channel_info_arr;
829         int i = 0;
830
831         if (!hdev->hl_chip_info->info)
832                 return;
833
834         channel_info_arr = hdev->hl_chip_info->info;
835
836         while (channel_info_arr[i]) {
837                 kfree(channel_info_arr[i]->config);
838                 kfree(channel_info_arr[i]);
839                 i++;
840         }
841
842         kfree(channel_info_arr);
843
844         hdev->hl_chip_info->info = NULL;
845 }
846
847 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
848 {
849         dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
850         void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
851         int i, j, rc = 0;
852
853         /*
854          * The device CPU works with 40-bits addresses, while bit 39 must be set
855          * to '1' when accessing the host.
856          * Bits 49:39 of the full host address are saved for a later
857          * configuration of the HW to perform extension to 50 bits.
858          * Because there is a single HW register that holds the extension bits,
859          * these bits must be identical in all allocated range.
860          */
861
862         for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
863                 virt_addr_arr[i] =
864                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
865                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
866                                                 &dma_addr_arr[i],
867                                                 GFP_KERNEL | __GFP_ZERO);
868                 if (!virt_addr_arr[i]) {
869                         rc = -ENOMEM;
870                         goto free_dma_mem_arr;
871                 }
872
873                 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
874                 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
875                                 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
876                         break;
877         }
878
879         if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
880                 dev_err(hdev->dev,
881                         "MSB of CPU accessible DMA memory are not identical in all range\n");
882                 rc = -EFAULT;
883                 goto free_dma_mem_arr;
884         }
885
886         hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
887         hdev->cpu_accessible_dma_address = dma_addr_arr[i];
888         hdev->cpu_pci_msb_addr =
889                 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
890
891         GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
892
893 free_dma_mem_arr:
894         for (j = 0 ; j < i ; j++)
895                 hdev->asic_funcs->asic_dma_free_coherent(hdev,
896                                                 HL_CPU_ACCESSIBLE_MEM_SIZE,
897                                                 virt_addr_arr[j],
898                                                 dma_addr_arr[j]);
899
900         return rc;
901 }
902
903 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
904 {
905         struct gaudi_device *gaudi = hdev->asic_specific;
906         struct gaudi_internal_qman_info *q;
907         u32 i;
908
909         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
910                 q = &gaudi->internal_qmans[i];
911                 if (!q->pq_kernel_addr)
912                         continue;
913                 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
914                                                         q->pq_kernel_addr,
915                                                         q->pq_dma_addr);
916         }
917 }
918
919 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
920 {
921         struct gaudi_device *gaudi = hdev->asic_specific;
922         struct gaudi_internal_qman_info *q;
923         int rc, i;
924
925         for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
926                 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
927                         continue;
928
929                 q = &gaudi->internal_qmans[i];
930
931                 switch (i) {
932                 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
933                 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
934                         q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
935                         break;
936                 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
937                         q->pq_size = MME_QMAN_SIZE_IN_BYTES;
938                         break;
939                 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
940                         q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
941                         break;
942                 default:
943                         dev_err(hdev->dev, "Bad internal queue index %d", i);
944                         rc = -EINVAL;
945                         goto free_internal_qmans_pq_mem;
946                 }
947
948                 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
949                                                 hdev, q->pq_size,
950                                                 &q->pq_dma_addr,
951                                                 GFP_KERNEL | __GFP_ZERO);
952                 if (!q->pq_kernel_addr) {
953                         rc = -ENOMEM;
954                         goto free_internal_qmans_pq_mem;
955                 }
956         }
957
958         return 0;
959
960 free_internal_qmans_pq_mem:
961         gaudi_free_internal_qmans_pq_mem(hdev);
962         return rc;
963 }
964
965 static int gaudi_sw_init(struct hl_device *hdev)
966 {
967         struct gaudi_device *gaudi;
968         u32 i, event_id = 0;
969         int rc;
970
971         /* Allocate device structure */
972         gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
973         if (!gaudi)
974                 return -ENOMEM;
975
976         for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
977                 if (gaudi_irq_map_table[i].valid) {
978                         if (event_id == GAUDI_EVENT_SIZE) {
979                                 dev_err(hdev->dev,
980                                         "Event array exceeds the limit of %u events\n",
981                                         GAUDI_EVENT_SIZE);
982                                 rc = -EINVAL;
983                                 goto free_gaudi_device;
984                         }
985
986                         gaudi->events[event_id++] =
987                                         gaudi_irq_map_table[i].fc_id;
988                 }
989         }
990
991         gaudi->cpucp_info_get = gaudi_cpucp_info_get;
992
993         gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
994
995         hdev->asic_specific = gaudi;
996
997         /* Create DMA pool for small allocations */
998         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
999                         &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1000         if (!hdev->dma_pool) {
1001                 dev_err(hdev->dev, "failed to create DMA pool\n");
1002                 rc = -ENOMEM;
1003                 goto free_gaudi_device;
1004         }
1005
1006         rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1007         if (rc)
1008                 goto free_dma_pool;
1009
1010         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1011         if (!hdev->cpu_accessible_dma_pool) {
1012                 dev_err(hdev->dev,
1013                         "Failed to create CPU accessible DMA pool\n");
1014                 rc = -ENOMEM;
1015                 goto free_cpu_dma_mem;
1016         }
1017
1018         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1019                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1020                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1021         if (rc) {
1022                 dev_err(hdev->dev,
1023                         "Failed to add memory to CPU accessible DMA pool\n");
1024                 rc = -EFAULT;
1025                 goto free_cpu_accessible_dma_pool;
1026         }
1027
1028         rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1029         if (rc)
1030                 goto free_cpu_accessible_dma_pool;
1031
1032         spin_lock_init(&gaudi->hw_queues_lock);
1033         mutex_init(&gaudi->clk_gate_mutex);
1034
1035         hdev->supports_sync_stream = true;
1036         hdev->supports_coresight = true;
1037
1038         return 0;
1039
1040 free_cpu_accessible_dma_pool:
1041         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1042 free_cpu_dma_mem:
1043         GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1044                                 hdev->cpu_pci_msb_addr);
1045         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1046                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1047                         hdev->cpu_accessible_dma_mem,
1048                         hdev->cpu_accessible_dma_address);
1049 free_dma_pool:
1050         dma_pool_destroy(hdev->dma_pool);
1051 free_gaudi_device:
1052         kfree(gaudi);
1053         return rc;
1054 }
1055
1056 static int gaudi_sw_fini(struct hl_device *hdev)
1057 {
1058         struct gaudi_device *gaudi = hdev->asic_specific;
1059
1060         gaudi_free_internal_qmans_pq_mem(hdev);
1061
1062         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1063
1064         GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1065                                         hdev->cpu_pci_msb_addr);
1066         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1067                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1068                         hdev->cpu_accessible_dma_mem,
1069                         hdev->cpu_accessible_dma_address);
1070
1071         dma_pool_destroy(hdev->dma_pool);
1072
1073         mutex_destroy(&gaudi->clk_gate_mutex);
1074
1075         kfree(gaudi);
1076
1077         return 0;
1078 }
1079
1080 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1081 {
1082         struct hl_device *hdev = arg;
1083         int i;
1084
1085         if (hdev->disabled)
1086                 return IRQ_HANDLED;
1087
1088         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1089                 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1090
1091         hl_irq_handler_eq(irq, &hdev->event_queue);
1092
1093         return IRQ_HANDLED;
1094 }
1095
1096 /*
1097  * For backward compatibility, new MSI interrupts should be set after the
1098  * existing CPU and NIC interrupts.
1099  */
1100 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1101                                 bool cpu_eq)
1102 {
1103         int msi_vec;
1104
1105         if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1106                 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1107                                 GAUDI_EVENT_QUEUE_MSI_IDX);
1108
1109         msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1110                         (nr + NIC_NUMBER_OF_ENGINES + 1);
1111
1112         return pci_irq_vector(hdev->pdev, msi_vec);
1113 }
1114
1115 static int gaudi_enable_msi_single(struct hl_device *hdev)
1116 {
1117         int rc, irq;
1118
1119         dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1120
1121         irq = gaudi_pci_irq_vector(hdev, 0, false);
1122         rc = request_irq(irq, gaudi_irq_handler_single, 0,
1123                         "gaudi single msi", hdev);
1124         if (rc)
1125                 dev_err(hdev->dev,
1126                         "Failed to request single MSI IRQ\n");
1127
1128         return rc;
1129 }
1130
1131 static int gaudi_enable_msi_multi(struct hl_device *hdev)
1132 {
1133         int cq_cnt = hdev->asic_prop.completion_queues_count;
1134         int rc, i, irq_cnt_init, irq;
1135
1136         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1137                 irq = gaudi_pci_irq_vector(hdev, i, false);
1138                 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1139                                 &hdev->completion_queue[i]);
1140                 if (rc) {
1141                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1142                         goto free_irqs;
1143                 }
1144         }
1145
1146         irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1147         rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1148                                 &hdev->event_queue);
1149         if (rc) {
1150                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1151                 goto free_irqs;
1152         }
1153
1154         return 0;
1155
1156 free_irqs:
1157         for (i = 0 ; i < irq_cnt_init ; i++)
1158                 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1159                                 &hdev->completion_queue[i]);
1160         return rc;
1161 }
1162
1163 static int gaudi_enable_msi(struct hl_device *hdev)
1164 {
1165         struct gaudi_device *gaudi = hdev->asic_specific;
1166         int rc;
1167
1168         if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1169                 return 0;
1170
1171         rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1172                                         PCI_IRQ_MSI);
1173         if (rc < 0) {
1174                 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1175                 return rc;
1176         }
1177
1178         if (rc < NUMBER_OF_INTERRUPTS) {
1179                 gaudi->multi_msi_mode = false;
1180                 rc = gaudi_enable_msi_single(hdev);
1181         } else {
1182                 gaudi->multi_msi_mode = true;
1183                 rc = gaudi_enable_msi_multi(hdev);
1184         }
1185
1186         if (rc)
1187                 goto free_pci_irq_vectors;
1188
1189         gaudi->hw_cap_initialized |= HW_CAP_MSI;
1190
1191         return 0;
1192
1193 free_pci_irq_vectors:
1194         pci_free_irq_vectors(hdev->pdev);
1195         return rc;
1196 }
1197
1198 static void gaudi_sync_irqs(struct hl_device *hdev)
1199 {
1200         struct gaudi_device *gaudi = hdev->asic_specific;
1201         int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1202
1203         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1204                 return;
1205
1206         /* Wait for all pending IRQs to be finished */
1207         if (gaudi->multi_msi_mode) {
1208                 for (i = 0 ; i < cq_cnt ; i++)
1209                         synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1210
1211                 synchronize_irq(gaudi_pci_irq_vector(hdev,
1212                                                 GAUDI_EVENT_QUEUE_MSI_IDX,
1213                                                 true));
1214         } else {
1215                 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1216         }
1217 }
1218
1219 static void gaudi_disable_msi(struct hl_device *hdev)
1220 {
1221         struct gaudi_device *gaudi = hdev->asic_specific;
1222         int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1223
1224         if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1225                 return;
1226
1227         gaudi_sync_irqs(hdev);
1228
1229         if (gaudi->multi_msi_mode) {
1230                 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1231                                                 true);
1232                 free_irq(irq, &hdev->event_queue);
1233
1234                 for (i = 0 ; i < cq_cnt ; i++) {
1235                         irq = gaudi_pci_irq_vector(hdev, i, false);
1236                         free_irq(irq, &hdev->completion_queue[i]);
1237                 }
1238         } else {
1239                 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1240         }
1241
1242         pci_free_irq_vectors(hdev->pdev);
1243
1244         gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1245 }
1246
1247 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1248 {
1249         struct gaudi_device *gaudi = hdev->asic_specific;
1250
1251         if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1252                 return;
1253
1254         if (!hdev->sram_scrambler_enable)
1255                 return;
1256
1257         WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1258                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1259         WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1260                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1261         WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1262                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1263         WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1264                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1265         WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1266                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1267         WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1268                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1269         WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1270                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1271         WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1272                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1273
1274         WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1275                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1276         WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1277                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1278         WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1279                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1280         WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1281                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1282         WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1283                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1284         WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1285                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1286         WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1287                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1288         WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1289                         1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1290
1291         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1292                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1293         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1294                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1295         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1296                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1297         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1298                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1299         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1300                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1301         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1302                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1303         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1304                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1305         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1306                         1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1307
1308         gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1309 }
1310
1311 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1312 {
1313         struct gaudi_device *gaudi = hdev->asic_specific;
1314
1315         if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1316                 return;
1317
1318         if (!hdev->dram_scrambler_enable)
1319                 return;
1320
1321         WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1322                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1323         WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1324                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1325         WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1326                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1327         WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1328                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1329         WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1330                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1331         WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1332                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1333         WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1334                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1335         WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1336                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1337
1338         WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1339                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1340         WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1341                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1342         WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1343                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1344         WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1345                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1346         WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1347                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1348         WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1349                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1350         WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1351                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1352         WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1353                         1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1354
1355         WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1356                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1357         WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1358                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1359         WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1360                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1361         WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1362                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1363         WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1364                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1365         WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1366                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1367         WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1368                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1369         WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1370                         1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1371
1372         gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1373 }
1374
1375 static void gaudi_init_e2e(struct hl_device *hdev)
1376 {
1377         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1378         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1379         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1380         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1381
1382         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1383         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1384         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1385         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1386
1387         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1388         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1389         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1390         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1391
1392         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1393         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1394         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1395         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1396
1397         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1398         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1399         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1400         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1401
1402         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1403         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1404         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1405         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1406
1407         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1408         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1409         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1410         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1411
1412         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1413         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1414         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1415         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1416
1417         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1418         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1419         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1420         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1421
1422         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1423         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1424         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1425         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1426
1427         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1428         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1429         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1430         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1431
1432         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1433         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1434         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1435         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1436
1437         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1438         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1439         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1440         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1441
1442         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1443         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1444         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1445         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1446
1447         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1448         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1449         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1450         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1451
1452         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1453         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1454         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1455         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1456
1457         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1458         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1459         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1460         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1461
1462         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1463         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1464         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1465         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1466
1467         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1468         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1469         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1470         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1471
1472         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1473         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1474         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1475         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1476
1477         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1478         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1479         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1480         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1481
1482         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1483         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1484         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1485         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1486
1487         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1488         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1489         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1490         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1491
1492         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1493         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1494         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1495         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1496
1497         if (!hdev->dram_scrambler_enable) {
1498                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1499                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1500                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1501                 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1502
1503                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1504                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1505                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1506                 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1507
1508                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1509                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1510                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1511                 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1512
1513                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1514                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1515                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1516                 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1517
1518                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1519                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1520                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1521                 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1522
1523                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1524                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1525                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1526                 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1527
1528                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1529                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1530                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1531                 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1532
1533                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1534                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1535                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1536                 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1537
1538                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1539                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1540                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1541                 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1542
1543                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1544                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1545                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1546                 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1547
1548                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1549                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1550                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1551                 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1552
1553                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1554                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1555                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1556                 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1557
1558                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1559                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1560                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1561                 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1562
1563                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1564                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1565                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1566                 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1567
1568                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1569                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1570                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1571                 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1572
1573                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1574                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1575                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1576                 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1577
1578                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1579                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1580                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1581                 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1582
1583                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1584                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1585                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1586                 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1587
1588                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1589                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1590                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1591                 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1592
1593                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1594                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1595                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1596                 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1597
1598                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1599                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1600                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1601                 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1602
1603                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1604                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1605                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1606                 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1607
1608                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1609                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1610                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1611                 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1612
1613                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1614                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1615                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1616                 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1617         }
1618
1619         WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1620                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1621         WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1622                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1623
1624         WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1625                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1626         WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1627                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1628
1629         WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1630                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1631         WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1632                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1633
1634         WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1635                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1636         WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1637                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1638
1639         WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1640                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1641         WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1642                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1643
1644         WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1645                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1646         WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1647                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1648
1649         WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1650                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1651         WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1652                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1653
1654         WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1655                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1656         WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1657                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1658
1659         WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1660                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1661         WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1662                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1663
1664         WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1665                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1666         WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1667                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1668
1669         WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1670                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1671         WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1672                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1673
1674         WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1675                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1676         WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1677                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1678
1679         WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1680                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1681         WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1682                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1683
1684         WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1685                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1686         WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1687                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1688
1689         WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1690                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1691         WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1692                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1693
1694         WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1695                         1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1696         WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1697                         1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1698
1699         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1700                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1701         WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1702                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1703
1704         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1705                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1706         WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1707                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1708
1709         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1710                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1711         WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1712                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1713
1714         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1715                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1716         WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1717                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1718
1719         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1720                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1721         WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1722                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1723
1724         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1725                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1726         WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1727                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1728
1729         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1730                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1731         WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1732                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1733
1734         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1735                         1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1736         WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1737                         1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1738 }
1739
1740 static void gaudi_init_hbm_cred(struct hl_device *hdev)
1741 {
1742         uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1743
1744         hbm0_wr = 0x33333333;
1745         hbm0_rd = 0x77777777;
1746         hbm1_wr = 0x55555555;
1747         hbm1_rd = 0xDDDDDDDD;
1748
1749         WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1750         WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1751         WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1752         WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1753
1754         WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1755         WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1756         WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1757         WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1758
1759         WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1760         WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1761         WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1762         WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1763
1764         WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1765         WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1766         WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1767         WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1768
1769         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1770                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1771                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1772         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1773                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1774                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1775         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1776                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1777                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1778         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1779                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1780                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1781
1782         WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1783                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1784                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1785         WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1786                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1787                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1788         WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1789                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1790                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1791         WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1792                         (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1793                         (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1794 }
1795
1796 static void gaudi_init_golden_registers(struct hl_device *hdev)
1797 {
1798         u32 tpc_offset;
1799         int tpc_id, i;
1800
1801         gaudi_init_e2e(hdev);
1802
1803         gaudi_init_hbm_cred(hdev);
1804
1805         hdev->asic_funcs->disable_clock_gating(hdev);
1806
1807         for (tpc_id = 0, tpc_offset = 0;
1808                                 tpc_id < TPC_NUMBER_OF_ENGINES;
1809                                 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1810                 /* Mask all arithmetic interrupts from TPC */
1811                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1812                 /* Set 16 cache lines */
1813                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1814                                 ICACHE_FETCH_LINE_NUM, 2);
1815         }
1816
1817         /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1818         for (i = 0 ; i < 128 ; i += 8)
1819                 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1820
1821         WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1822         WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1823         WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1824         WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1825 }
1826
1827 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1828                                         int qman_id, dma_addr_t qman_pq_addr)
1829 {
1830         u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1831         u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1832         u32 q_off, dma_qm_offset;
1833         u32 dma_qm_err_cfg;
1834
1835         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1836
1837         mtr_base_en_lo = lower_32_bits(CFG_BASE +
1838                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1839         mtr_base_en_hi = upper_32_bits(CFG_BASE +
1840                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1841         so_base_en_lo = lower_32_bits(CFG_BASE +
1842                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1843         so_base_en_hi = upper_32_bits(CFG_BASE +
1844                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1845         mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1846                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1847         mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1848                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1849         so_base_ws_lo = lower_32_bits(CFG_BASE +
1850                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1851         so_base_ws_hi = upper_32_bits(CFG_BASE +
1852                                 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1853
1854         q_off = dma_qm_offset + qman_id * 4;
1855
1856         WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1857         WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1858
1859         WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1860         WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1861         WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1862
1863         WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1864         WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1865         WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1866
1867         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1868         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1869         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1870         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1871         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1872         WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1873         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1874         WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1875
1876         WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
1877
1878         /* The following configuration is needed only once per QMAN */
1879         if (qman_id == 0) {
1880                 /* Configure RAZWI IRQ */
1881                 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1882                 if (hdev->stop_on_err) {
1883                         dma_qm_err_cfg |=
1884                                 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1885                 }
1886
1887                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1888                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1889                         lower_32_bits(CFG_BASE +
1890                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1891                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1892                         upper_32_bits(CFG_BASE +
1893                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1894                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1895                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1896                                                                         dma_id);
1897
1898                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1899                                 QM_ARB_ERR_MSG_EN_MASK);
1900
1901                 /* Increase ARB WDT to support streams architecture */
1902                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1903                                 GAUDI_ARB_WDT_TIMEOUT);
1904
1905                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1906                                 QMAN_EXTERNAL_MAKE_TRUSTED);
1907
1908                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1909         }
1910 }
1911
1912 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1913 {
1914         u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1915         u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1916
1917         /* Set to maximum possible according to physical size */
1918         WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1919         WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1920
1921         /* STOP_ON bit implies no completion to operation in case of RAZWI */
1922         if (hdev->stop_on_err)
1923                 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1924
1925         WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1926         WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1927                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1928         WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1929                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1930         WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1931                 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1932         WREG32(mmDMA0_CORE_PROT + dma_offset,
1933                         1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1934         /* If the channel is secured, it should be in MMU bypass mode */
1935         WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1936                         1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1937         WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1938 }
1939
1940 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1941                                 u32 enable_mask)
1942 {
1943         u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1944
1945         WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1946 }
1947
1948 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1949 {
1950         struct gaudi_device *gaudi = hdev->asic_specific;
1951         struct hl_hw_queue *q;
1952         int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1953
1954         if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1955                 return;
1956
1957         for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1958                 dma_id = gaudi_dma_assignment[i];
1959                 /*
1960                  * For queues after the CPU Q need to add 1 to get the correct
1961                  * queue. In addition, need to add the CPU EQ and NIC IRQs in
1962                  * order to get the correct MSI register.
1963                  */
1964                 if (dma_id > 1) {
1965                         cpu_skip = 1;
1966                         nic_skip = NIC_NUMBER_OF_ENGINES;
1967                 } else {
1968                         cpu_skip = 0;
1969                         nic_skip = 0;
1970                 }
1971
1972                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1973                         q_idx = 4 * dma_id + j + cpu_skip;
1974                         q = &hdev->kernel_queues[q_idx];
1975                         q->cq_id = cq_id++;
1976                         q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1977                         gaudi_init_pci_dma_qman(hdev, dma_id, j,
1978                                                 q->bus_address);
1979                 }
1980
1981                 gaudi_init_dma_core(hdev, dma_id);
1982
1983                 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
1984         }
1985
1986         gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
1987 }
1988
1989 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
1990                                         int qman_id, u64 qman_base_addr)
1991 {
1992         u32 mtr_base_lo, mtr_base_hi;
1993         u32 so_base_lo, so_base_hi;
1994         u32 q_off, dma_qm_offset;
1995         u32 dma_qm_err_cfg;
1996
1997         dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1998
1999         mtr_base_lo = lower_32_bits(CFG_BASE +
2000                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2001         mtr_base_hi = upper_32_bits(CFG_BASE +
2002                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2003         so_base_lo = lower_32_bits(CFG_BASE +
2004                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2005         so_base_hi = upper_32_bits(CFG_BASE +
2006                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2007
2008         q_off = dma_qm_offset + qman_id * 4;
2009
2010         if (qman_id < 4) {
2011                 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2012                                         lower_32_bits(qman_base_addr));
2013                 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2014                                         upper_32_bits(qman_base_addr));
2015
2016                 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2017                 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2018                 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2019
2020                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2021                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2022                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2023         } else {
2024                 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2025                 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2026                 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2027
2028                 /* Configure RAZWI IRQ */
2029                 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2030                 if (hdev->stop_on_err) {
2031                         dma_qm_err_cfg |=
2032                                 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2033                 }
2034                 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2035
2036                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2037                         lower_32_bits(CFG_BASE +
2038                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2039                 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2040                         upper_32_bits(CFG_BASE +
2041                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2042                 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2043                         gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2044                                                                         dma_id);
2045
2046                 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2047                                 QM_ARB_ERR_MSG_EN_MASK);
2048
2049                 /* Increase ARB WDT to support streams architecture */
2050                 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2051                                 GAUDI_ARB_WDT_TIMEOUT);
2052
2053                 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2054                 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2055                                 QMAN_INTERNAL_MAKE_TRUSTED);
2056         }
2057
2058         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2059         WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2060         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2061         WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2062 }
2063
2064 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2065 {
2066         struct gaudi_device *gaudi = hdev->asic_specific;
2067         struct gaudi_internal_qman_info *q;
2068         u64 qman_base_addr;
2069         int i, j, dma_id, internal_q_index;
2070
2071         if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2072                 return;
2073
2074         for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2075                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2076
2077                 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2078                          /*
2079                           * Add the CPU queue in order to get the correct queue
2080                           * number as all internal queue are placed after it
2081                           */
2082                         internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2083
2084                         q = &gaudi->internal_qmans[internal_q_index];
2085                         qman_base_addr = (u64) q->pq_dma_addr;
2086                         gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2087                                                 qman_base_addr);
2088                 }
2089
2090                 /* Initializing lower CP for HBM DMA QMAN */
2091                 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2092
2093                 gaudi_init_dma_core(hdev, dma_id);
2094
2095                 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2096         }
2097
2098         gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2099 }
2100
2101 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2102                                         int qman_id, u64 qman_base_addr)
2103 {
2104         u32 mtr_base_lo, mtr_base_hi;
2105         u32 so_base_lo, so_base_hi;
2106         u32 q_off, mme_id;
2107         u32 mme_qm_err_cfg;
2108
2109         mtr_base_lo = lower_32_bits(CFG_BASE +
2110                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2111         mtr_base_hi = upper_32_bits(CFG_BASE +
2112                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2113         so_base_lo = lower_32_bits(CFG_BASE +
2114                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2115         so_base_hi = upper_32_bits(CFG_BASE +
2116                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2117
2118         q_off = mme_offset + qman_id * 4;
2119
2120         if (qman_id < 4) {
2121                 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2122                                         lower_32_bits(qman_base_addr));
2123                 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2124                                         upper_32_bits(qman_base_addr));
2125
2126                 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2127                 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2128                 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2129
2130                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2131                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2132                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2133         } else {
2134                 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2135                 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2136                 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2137
2138                 /* Configure RAZWI IRQ */
2139                 mme_id = mme_offset /
2140                                 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2141
2142                 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2143                 if (hdev->stop_on_err) {
2144                         mme_qm_err_cfg |=
2145                                 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2146                 }
2147                 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2148                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2149                         lower_32_bits(CFG_BASE +
2150                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2151                 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2152                         upper_32_bits(CFG_BASE +
2153                                         mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2154                 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2155                         gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2156                                                                         mme_id);
2157
2158                 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2159                                 QM_ARB_ERR_MSG_EN_MASK);
2160
2161                 /* Increase ARB WDT to support streams architecture */
2162                 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2163                                 GAUDI_ARB_WDT_TIMEOUT);
2164
2165                 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2166                 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2167                                 QMAN_INTERNAL_MAKE_TRUSTED);
2168         }
2169
2170         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2171         WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2172         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2173         WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2174 }
2175
2176 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2177 {
2178         struct gaudi_device *gaudi = hdev->asic_specific;
2179         struct gaudi_internal_qman_info *q;
2180         u64 qman_base_addr;
2181         u32 mme_offset;
2182         int i, internal_q_index;
2183
2184         if (gaudi->hw_cap_initialized & HW_CAP_MME)
2185                 return;
2186
2187         /*
2188          * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2189          * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2190          */
2191
2192         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2193
2194         for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2195                 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2196                 q = &gaudi->internal_qmans[internal_q_index];
2197                 qman_base_addr = (u64) q->pq_dma_addr;
2198                 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2199                                         qman_base_addr);
2200                 if (i == 3)
2201                         mme_offset = 0;
2202         }
2203
2204         /* Initializing lower CP for MME QMANs */
2205         mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2206         gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2207         gaudi_init_mme_qman(hdev, 0, 4, 0);
2208
2209         WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2210         WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2211
2212         gaudi->hw_cap_initialized |= HW_CAP_MME;
2213 }
2214
2215 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2216                                 int qman_id, u64 qman_base_addr)
2217 {
2218         u32 mtr_base_lo, mtr_base_hi;
2219         u32 so_base_lo, so_base_hi;
2220         u32 q_off, tpc_id;
2221         u32 tpc_qm_err_cfg;
2222
2223         mtr_base_lo = lower_32_bits(CFG_BASE +
2224                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2225         mtr_base_hi = upper_32_bits(CFG_BASE +
2226                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2227         so_base_lo = lower_32_bits(CFG_BASE +
2228                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2229         so_base_hi = upper_32_bits(CFG_BASE +
2230                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2231
2232         q_off = tpc_offset + qman_id * 4;
2233
2234         if (qman_id < 4) {
2235                 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2236                                         lower_32_bits(qman_base_addr));
2237                 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2238                                         upper_32_bits(qman_base_addr));
2239
2240                 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2241                 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2242                 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2243
2244                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2245                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2246                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2247         } else {
2248                 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2249                 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2250                 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2251
2252                 /* Configure RAZWI IRQ */
2253                 tpc_id = tpc_offset /
2254                                 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2255
2256                 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2257                 if (hdev->stop_on_err) {
2258                         tpc_qm_err_cfg |=
2259                                 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2260                 }
2261
2262                 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2263                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2264                         lower_32_bits(CFG_BASE +
2265                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2266                 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2267                         upper_32_bits(CFG_BASE +
2268                                 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2269                 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2270                         gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2271                                                                         tpc_id);
2272
2273                 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2274                                 QM_ARB_ERR_MSG_EN_MASK);
2275
2276                 /* Increase ARB WDT to support streams architecture */
2277                 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2278                                 GAUDI_ARB_WDT_TIMEOUT);
2279
2280                 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2281                 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2282                                 QMAN_INTERNAL_MAKE_TRUSTED);
2283         }
2284
2285         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2286         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2287         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2288         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2289 }
2290
2291 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2292 {
2293         struct gaudi_device *gaudi = hdev->asic_specific;
2294         struct gaudi_internal_qman_info *q;
2295         u64 qman_base_addr;
2296         u32 so_base_hi, tpc_offset = 0;
2297         u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2298                         mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2299         int i, tpc_id, internal_q_index;
2300
2301         if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2302                 return;
2303
2304         so_base_hi = upper_32_bits(CFG_BASE +
2305                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2306
2307         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2308                 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2309                         internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2310                                                 tpc_id * QMAN_STREAMS + i;
2311                         q = &gaudi->internal_qmans[internal_q_index];
2312                         qman_base_addr = (u64) q->pq_dma_addr;
2313                         gaudi_init_tpc_qman(hdev, tpc_offset, i,
2314                                                 qman_base_addr);
2315
2316                         if (i == 3) {
2317                                 /* Initializing lower CP for TPC QMAN */
2318                                 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2319
2320                                 /* Enable the QMAN and TPC channel */
2321                                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2322                                                 QMAN_TPC_ENABLE);
2323                         }
2324                 }
2325
2326                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2327                                 so_base_hi);
2328
2329                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2330
2331                 gaudi->hw_cap_initialized |=
2332                                 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
2333         }
2334 }
2335
2336 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2337 {
2338         struct gaudi_device *gaudi = hdev->asic_specific;
2339
2340         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2341                 return;
2342
2343         WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2344         WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2345         WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2346 }
2347
2348 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2349 {
2350         struct gaudi_device *gaudi = hdev->asic_specific;
2351
2352         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2353                 return;
2354
2355         WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2356         WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2357         WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2358         WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2359         WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2360 }
2361
2362 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2363 {
2364         struct gaudi_device *gaudi = hdev->asic_specific;
2365
2366         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2367                 return;
2368
2369         WREG32(mmMME2_QM_GLBL_CFG0, 0);
2370         WREG32(mmMME0_QM_GLBL_CFG0, 0);
2371 }
2372
2373 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2374 {
2375         struct gaudi_device *gaudi = hdev->asic_specific;
2376         u32 tpc_offset = 0;
2377         int tpc_id;
2378
2379         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2380                 return;
2381
2382         for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2383                 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2384                 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2385         }
2386 }
2387
2388 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2389 {
2390         struct gaudi_device *gaudi = hdev->asic_specific;
2391
2392         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2393                 return;
2394
2395         /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2396         WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2397         WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2398         WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2399 }
2400
2401 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2402 {
2403         struct gaudi_device *gaudi = hdev->asic_specific;
2404
2405         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2406                 return;
2407
2408         /* Stop CPs of HBM DMA QMANs */
2409
2410         WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2411         WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2412         WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2413         WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2414         WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2415 }
2416
2417 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2418 {
2419         struct gaudi_device *gaudi = hdev->asic_specific;
2420
2421         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2422                 return;
2423
2424         /* Stop CPs of MME QMANs */
2425         WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2426         WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2427 }
2428
2429 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2430 {
2431         struct gaudi_device *gaudi = hdev->asic_specific;
2432
2433         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2434                 return;
2435
2436         WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2437         WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2438         WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2439         WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2440         WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2441         WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2442         WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2443         WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444 }
2445
2446 static void gaudi_pci_dma_stall(struct hl_device *hdev)
2447 {
2448         struct gaudi_device *gaudi = hdev->asic_specific;
2449
2450         if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2451                 return;
2452
2453         WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2454         WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2455         WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2456 }
2457
2458 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2459 {
2460         struct gaudi_device *gaudi = hdev->asic_specific;
2461
2462         if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2463                 return;
2464
2465         WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2466         WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2467         WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2468         WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2469         WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2470 }
2471
2472 static void gaudi_mme_stall(struct hl_device *hdev)
2473 {
2474         struct gaudi_device *gaudi = hdev->asic_specific;
2475
2476         if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2477                 return;
2478
2479         /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2480         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2481         WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2482         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2483         WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2484         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2485         WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2486         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2487         WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2488         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2489         WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2490         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2491         WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2492         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2493         WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2494         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2495         WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2496 }
2497
2498 static void gaudi_tpc_stall(struct hl_device *hdev)
2499 {
2500         struct gaudi_device *gaudi = hdev->asic_specific;
2501
2502         if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2503                 return;
2504
2505         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2506         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2507         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2508         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2509         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2510         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2511         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2512         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2513 }
2514
2515 static void gaudi_set_clock_gating(struct hl_device *hdev)
2516 {
2517         struct gaudi_device *gaudi = hdev->asic_specific;
2518         u32 qman_offset;
2519         bool enable;
2520         int i;
2521
2522         /* In case we are during debug session, don't enable the clock gate
2523          * as it may interfere
2524          */
2525         if (hdev->in_debug)
2526                 return;
2527
2528         for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) {
2529                 enable = !!(hdev->clock_gating_mask &
2530                                 (BIT_ULL(gaudi_dma_assignment[i])));
2531
2532                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2533                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2534                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2535                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2536                                 enable ? QMAN_UPPER_CP_CGM_PWR_GATE_EN : 0);
2537         }
2538
2539         for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) {
2540                 enable = !!(hdev->clock_gating_mask &
2541                                 (BIT_ULL(gaudi_dma_assignment[i])));
2542
2543                 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2544                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
2545                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2546                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2547                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2548         }
2549
2550         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0)));
2551         WREG32(mmMME0_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2552         WREG32(mmMME0_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2553
2554         enable = !!(hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2)));
2555         WREG32(mmMME2_QM_CGM_CFG1, enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2556         WREG32(mmMME2_QM_CGM_CFG, enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2557
2558         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2559                 enable = !!(hdev->clock_gating_mask &
2560                                 (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)));
2561
2562                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2563                                 enable ? QMAN_CGM1_PWR_GATE_EN : 0);
2564                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2565                                 enable ? QMAN_COMMON_CP_CGM_PWR_GATE_EN : 0);
2566
2567                 qman_offset += TPC_QMAN_OFFSET;
2568         }
2569
2570         gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2571 }
2572
2573 static void gaudi_disable_clock_gating(struct hl_device *hdev)
2574 {
2575         struct gaudi_device *gaudi = hdev->asic_specific;
2576         u32 qman_offset;
2577         int i;
2578
2579         if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2580                 return;
2581
2582         for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2583                 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2584                 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2585
2586                 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2587         }
2588
2589         WREG32(mmMME0_QM_CGM_CFG, 0);
2590         WREG32(mmMME0_QM_CGM_CFG1, 0);
2591         WREG32(mmMME2_QM_CGM_CFG, 0);
2592         WREG32(mmMME2_QM_CGM_CFG1, 0);
2593
2594         for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2595                 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2596                 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2597
2598                 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2599         }
2600
2601         gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2602 }
2603
2604 static void gaudi_enable_timestamp(struct hl_device *hdev)
2605 {
2606         /* Disable the timestamp counter */
2607         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2608
2609         /* Zero the lower/upper parts of the 64-bit counter */
2610         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2611         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2612
2613         /* Enable the counter */
2614         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2615 }
2616
2617 static void gaudi_disable_timestamp(struct hl_device *hdev)
2618 {
2619         /* Disable the timestamp counter */
2620         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2621 }
2622
2623 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2624 {
2625         u32 wait_timeout_ms;
2626
2627         dev_info(hdev->dev,
2628                 "Halting compute engines and disabling interrupts\n");
2629
2630         if (hdev->pldm)
2631                 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2632         else
2633                 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2634
2635
2636         gaudi_stop_mme_qmans(hdev);
2637         gaudi_stop_tpc_qmans(hdev);
2638         gaudi_stop_hbm_dma_qmans(hdev);
2639         gaudi_stop_pci_dma_qmans(hdev);
2640
2641         hdev->asic_funcs->disable_clock_gating(hdev);
2642
2643         msleep(wait_timeout_ms);
2644
2645         gaudi_pci_dma_stall(hdev);
2646         gaudi_hbm_dma_stall(hdev);
2647         gaudi_tpc_stall(hdev);
2648         gaudi_mme_stall(hdev);
2649
2650         msleep(wait_timeout_ms);
2651
2652         gaudi_disable_mme_qmans(hdev);
2653         gaudi_disable_tpc_qmans(hdev);
2654         gaudi_disable_hbm_dma_qmans(hdev);
2655         gaudi_disable_pci_dma_qmans(hdev);
2656
2657         gaudi_disable_timestamp(hdev);
2658
2659         gaudi_disable_msi(hdev);
2660 }
2661
2662 static int gaudi_mmu_init(struct hl_device *hdev)
2663 {
2664         struct asic_fixed_properties *prop = &hdev->asic_prop;
2665         struct gaudi_device *gaudi = hdev->asic_specific;
2666         u64 hop0_addr;
2667         int rc, i;
2668
2669         if (!hdev->mmu_enable)
2670                 return 0;
2671
2672         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2673                 return 0;
2674
2675         hdev->dram_supports_virtual_memory = false;
2676
2677         for (i = 0 ; i < prop->max_asid ; i++) {
2678                 hop0_addr = prop->mmu_pgt_addr +
2679                                 (i * prop->mmu_hop_table_size);
2680
2681                 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2682                 if (rc) {
2683                         dev_err(hdev->dev,
2684                                 "failed to set hop0 addr for asid %d\n", i);
2685                         goto err;
2686                 }
2687         }
2688
2689         /* init MMU cache manage page */
2690         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2691         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2692
2693         hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
2694
2695         WREG32(mmMMU_UP_MMU_ENABLE, 1);
2696         WREG32(mmMMU_UP_SPI_MASK, 0xF);
2697
2698         WREG32(mmSTLB_HOP_CONFIGURATION,
2699                         hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2700
2701         /*
2702          * The H/W expects the first PI after init to be 1. After wraparound
2703          * we'll write 0.
2704          */
2705         gaudi->mmu_cache_inv_pi = 1;
2706
2707         gaudi->hw_cap_initialized |= HW_CAP_MMU;
2708
2709         return 0;
2710
2711 err:
2712         return rc;
2713 }
2714
2715 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2716 {
2717         void __iomem *dst;
2718
2719         /* HBM scrambler must be initialized before pushing F/W to HBM */
2720         gaudi_init_scrambler_hbm(hdev);
2721
2722         dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2723
2724         return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2725 }
2726
2727 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2728 {
2729         void __iomem *dst;
2730
2731         dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2732
2733         return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2734 }
2735
2736 static void gaudi_read_device_fw_version(struct hl_device *hdev,
2737                                         enum hl_fw_component fwc)
2738 {
2739         const char *name;
2740         u32 ver_off;
2741         char *dest;
2742
2743         switch (fwc) {
2744         case FW_COMP_UBOOT:
2745                 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2746                 dest = hdev->asic_prop.uboot_ver;
2747                 name = "U-Boot";
2748                 break;
2749         case FW_COMP_PREBOOT:
2750                 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2751                 dest = hdev->asic_prop.preboot_ver;
2752                 name = "Preboot";
2753                 break;
2754         default:
2755                 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2756                 return;
2757         }
2758
2759         ver_off &= ~((u32)SRAM_BASE_ADDR);
2760
2761         if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2762                 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2763                                                         VERSION_MAX_LEN);
2764         } else {
2765                 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2766                                                                 name, ver_off);
2767                 strcpy(dest, "unavailable");
2768         }
2769 }
2770
2771 static int gaudi_init_cpu(struct hl_device *hdev)
2772 {
2773         struct gaudi_device *gaudi = hdev->asic_specific;
2774         int rc;
2775
2776         if (!hdev->cpu_enable)
2777                 return 0;
2778
2779         if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2780                 return 0;
2781
2782         /*
2783          * The device CPU works with 40 bits addresses.
2784          * This register sets the extension to 50 bits.
2785          */
2786         WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2787
2788         rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2789                         mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2790                         mmCPU_CMD_STATUS_TO_HOST,
2791                         mmCPU_BOOT_ERR0,
2792                         !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2793                         GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2794
2795         if (rc)
2796                 return rc;
2797
2798         gaudi->hw_cap_initialized |= HW_CAP_CPU;
2799
2800         return 0;
2801 }
2802
2803 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2804 {
2805         struct gaudi_device *gaudi = hdev->asic_specific;
2806         struct hl_eq *eq;
2807         u32 status;
2808         struct hl_hw_queue *cpu_pq =
2809                         &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2810         int err;
2811
2812         if (!hdev->cpu_queues_enable)
2813                 return 0;
2814
2815         if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2816                 return 0;
2817
2818         eq = &hdev->event_queue;
2819
2820         WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2821         WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2822
2823         WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2824         WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2825
2826         WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2827                         lower_32_bits(hdev->cpu_accessible_dma_address));
2828         WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2829                         upper_32_bits(hdev->cpu_accessible_dma_address));
2830
2831         WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2832         WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2833         WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2834
2835         /* Used for EQ CI */
2836         WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2837
2838         WREG32(mmCPU_IF_PF_PQ_PI, 0);
2839
2840         if (gaudi->multi_msi_mode)
2841                 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2842         else
2843                 WREG32(mmCPU_IF_QUEUE_INIT,
2844                         PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2845
2846         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2847
2848         err = hl_poll_timeout(
2849                 hdev,
2850                 mmCPU_IF_QUEUE_INIT,
2851                 status,
2852                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2853                 1000,
2854                 cpu_timeout);
2855
2856         if (err) {
2857                 dev_err(hdev->dev,
2858                         "Failed to communicate with Device CPU (CPU-CP timeout)\n");
2859                 return -EIO;
2860         }
2861
2862         gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2863         return 0;
2864 }
2865
2866 static void gaudi_pre_hw_init(struct hl_device *hdev)
2867 {
2868         /* Perform read from the device to make sure device is up */
2869         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2870
2871         /*
2872          * Let's mark in the H/W that we have reached this point. We check
2873          * this value in the reset_before_init function to understand whether
2874          * we need to reset the chip before doing H/W init. This register is
2875          * cleared by the H/W upon H/W reset
2876          */
2877         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2878
2879         /* Set the access through PCI bars (Linux driver only) as secured */
2880         WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2881                                         PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2882
2883         /* Perform read to flush the waiting writes to ensure configuration
2884          * was set in the device
2885          */
2886         RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2887
2888         if (hdev->axi_drain) {
2889                 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2890                         1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2891                 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2892                         1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2893
2894                 /* Perform read to flush the DRAIN cfg */
2895                 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2896         } else {
2897                 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2898                 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2899
2900                 /* Perform read to flush the DRAIN cfg */
2901                 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2902         }
2903
2904         /* Configure the reset registers. Must be done as early as possible
2905          * in case we fail during H/W initialization
2906          */
2907         WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2908                                         (CFG_RST_H_DMA_MASK |
2909                                         CFG_RST_H_MME_MASK |
2910                                         CFG_RST_H_SM_MASK |
2911                                         CFG_RST_H_TPC_7_MASK));
2912
2913         WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2914
2915         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2916                                         (CFG_RST_H_HBM_MASK |
2917                                         CFG_RST_H_TPC_7_MASK |
2918                                         CFG_RST_H_NIC_MASK |
2919                                         CFG_RST_H_SM_MASK |
2920                                         CFG_RST_H_DMA_MASK |
2921                                         CFG_RST_H_MME_MASK |
2922                                         CFG_RST_H_CPU_MASK |
2923                                         CFG_RST_H_MMU_MASK));
2924
2925         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2926                                         (CFG_RST_L_IF_MASK |
2927                                         CFG_RST_L_PSOC_MASK |
2928                                         CFG_RST_L_TPC_MASK));
2929 }
2930
2931 static int gaudi_hw_init(struct hl_device *hdev)
2932 {
2933         int rc;
2934
2935         dev_info(hdev->dev, "Starting initialization of H/W\n");
2936
2937         gaudi_pre_hw_init(hdev);
2938
2939         gaudi_init_pci_dma_qmans(hdev);
2940
2941         gaudi_init_hbm_dma_qmans(hdev);
2942
2943         rc = gaudi_init_cpu(hdev);
2944         if (rc) {
2945                 dev_err(hdev->dev, "failed to initialize CPU\n");
2946                 return rc;
2947         }
2948
2949         /* SRAM scrambler must be initialized after CPU is running from HBM */
2950         gaudi_init_scrambler_sram(hdev);
2951
2952         /* This is here just in case we are working without CPU */
2953         gaudi_init_scrambler_hbm(hdev);
2954
2955         gaudi_init_golden_registers(hdev);
2956
2957         rc = gaudi_mmu_init(hdev);
2958         if (rc)
2959                 return rc;
2960
2961         gaudi_init_security(hdev);
2962
2963         gaudi_init_mme_qmans(hdev);
2964
2965         gaudi_init_tpc_qmans(hdev);
2966
2967         hdev->asic_funcs->set_clock_gating(hdev);
2968
2969         gaudi_enable_timestamp(hdev);
2970
2971         /* MSI must be enabled before CPU queues are initialized */
2972         rc = gaudi_enable_msi(hdev);
2973         if (rc)
2974                 goto disable_queues;
2975
2976         /* must be called after MSI was enabled */
2977         rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
2978         if (rc) {
2979                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2980                         rc);
2981                 goto disable_msi;
2982         }
2983
2984         /* Perform read from the device to flush all configuration */
2985         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2986
2987         return 0;
2988
2989 disable_msi:
2990         gaudi_disable_msi(hdev);
2991 disable_queues:
2992         gaudi_disable_mme_qmans(hdev);
2993         gaudi_disable_pci_dma_qmans(hdev);
2994
2995         return rc;
2996 }
2997
2998 static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
2999 {
3000         struct gaudi_device *gaudi = hdev->asic_specific;
3001         u32 status, reset_timeout_ms, cpu_timeout_ms, boot_strap = 0;
3002
3003         if (!hard_reset) {
3004                 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
3005                 return;
3006         }
3007
3008         if (hdev->pldm) {
3009                 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3010                 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3011         } else {
3012                 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3013                 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
3014         }
3015
3016         /* Set device to handle FLR by H/W as we will put the device CPU to
3017          * halt mode
3018          */
3019         WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
3020                                         PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
3021
3022         /* I don't know what is the state of the CPU so make sure it is
3023          * stopped in any means necessary
3024          */
3025         WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
3026         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_HALT_MACHINE);
3027
3028         msleep(cpu_timeout_ms);
3029
3030         /* Tell ASIC not to re-initialize PCIe */
3031         WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3032
3033         boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3034
3035         /* H/W bug WA:
3036          * rdata[31:0] = strap_read_val;
3037          * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3038          */
3039         boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3040                         (boot_strap & 0x001FFFFF));
3041         WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3042
3043         /* Restart BTL/BLR upon hard-reset */
3044         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3045
3046         WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3047                         1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3048         dev_info(hdev->dev,
3049                 "Issued HARD reset command, going to wait %dms\n",
3050                 reset_timeout_ms);
3051
3052         /*
3053          * After hard reset, we can't poll the BTM_FSM register because the PSOC
3054          * itself is in reset. Need to wait until the reset is deasserted
3055          */
3056         msleep(reset_timeout_ms);
3057
3058         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3059         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3060                 dev_err(hdev->dev,
3061                         "Timeout while waiting for device to reset 0x%x\n",
3062                         status);
3063
3064         WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3065
3066         gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3067                                         HW_CAP_HBM | HW_CAP_PCI_DMA |
3068                                         HW_CAP_MME | HW_CAP_TPC_MASK |
3069                                         HW_CAP_HBM_DMA | HW_CAP_PLL |
3070                                         HW_CAP_MMU |
3071                                         HW_CAP_SRAM_SCRAMBLER |
3072                                         HW_CAP_HBM_SCRAMBLER |
3073                                         HW_CAP_CLK_GATE);
3074
3075         memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3076 }
3077
3078 static int gaudi_suspend(struct hl_device *hdev)
3079 {
3080         int rc;
3081
3082         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
3083         if (rc)
3084                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3085
3086         return rc;
3087 }
3088
3089 static int gaudi_resume(struct hl_device *hdev)
3090 {
3091         return gaudi_init_iatu(hdev);
3092 }
3093
3094 static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3095                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
3096 {
3097         int rc;
3098
3099         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3100                         VM_DONTCOPY | VM_NORESERVE;
3101
3102         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
3103         if (rc)
3104                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
3105
3106         return rc;
3107 }
3108
3109 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3110 {
3111         struct gaudi_device *gaudi = hdev->asic_specific;
3112         u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3113         int dma_id;
3114         bool invalid_queue = false;
3115
3116         switch (hw_queue_id) {
3117         case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3118                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3119                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3120                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3121                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3122                 break;
3123
3124         case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3125                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3126                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3127                 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3128                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3129                 break;
3130
3131         case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3132                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3133                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3134                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3135                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3136                 break;
3137
3138         case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3139                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3140                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3141                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3142                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3143                 break;
3144
3145         case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3146                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3147                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3148                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3149                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3150                 break;
3151
3152         case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3153                 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3154                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3155                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3156                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3157                 break;
3158
3159         case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3160                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3161                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3162                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3163                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3164                 break;
3165
3166         case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3167                 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3168                 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3169                 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3170                 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3171                 break;
3172
3173         case GAUDI_QUEUE_ID_CPU_PQ:
3174                 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3175                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
3176                 else
3177                         invalid_queue = true;
3178                 break;
3179
3180         case GAUDI_QUEUE_ID_MME_0_0:
3181                 db_reg_offset = mmMME2_QM_PQ_PI_0;
3182                 break;
3183
3184         case GAUDI_QUEUE_ID_MME_0_1:
3185                 db_reg_offset = mmMME2_QM_PQ_PI_1;
3186                 break;
3187
3188         case GAUDI_QUEUE_ID_MME_0_2:
3189                 db_reg_offset = mmMME2_QM_PQ_PI_2;
3190                 break;
3191
3192         case GAUDI_QUEUE_ID_MME_0_3:
3193                 db_reg_offset = mmMME2_QM_PQ_PI_3;
3194                 break;
3195
3196         case GAUDI_QUEUE_ID_MME_1_0:
3197                 db_reg_offset = mmMME0_QM_PQ_PI_0;
3198                 break;
3199
3200         case GAUDI_QUEUE_ID_MME_1_1:
3201                 db_reg_offset = mmMME0_QM_PQ_PI_1;
3202                 break;
3203
3204         case GAUDI_QUEUE_ID_MME_1_2:
3205                 db_reg_offset = mmMME0_QM_PQ_PI_2;
3206                 break;
3207
3208         case GAUDI_QUEUE_ID_MME_1_3:
3209                 db_reg_offset = mmMME0_QM_PQ_PI_3;
3210                 break;
3211
3212         case GAUDI_QUEUE_ID_TPC_0_0:
3213                 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3214                 break;
3215
3216         case GAUDI_QUEUE_ID_TPC_0_1:
3217                 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3218                 break;
3219
3220         case GAUDI_QUEUE_ID_TPC_0_2:
3221                 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3222                 break;
3223
3224         case GAUDI_QUEUE_ID_TPC_0_3:
3225                 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3226                 break;
3227
3228         case GAUDI_QUEUE_ID_TPC_1_0:
3229                 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3230                 break;
3231
3232         case GAUDI_QUEUE_ID_TPC_1_1:
3233                 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3234                 break;
3235
3236         case GAUDI_QUEUE_ID_TPC_1_2:
3237                 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3238                 break;
3239
3240         case GAUDI_QUEUE_ID_TPC_1_3:
3241                 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3242                 break;
3243
3244         case GAUDI_QUEUE_ID_TPC_2_0:
3245                 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3246                 break;
3247
3248         case GAUDI_QUEUE_ID_TPC_2_1:
3249                 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3250                 break;
3251
3252         case GAUDI_QUEUE_ID_TPC_2_2:
3253                 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3254                 break;
3255
3256         case GAUDI_QUEUE_ID_TPC_2_3:
3257                 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3258                 break;
3259
3260         case GAUDI_QUEUE_ID_TPC_3_0:
3261                 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3262                 break;
3263
3264         case GAUDI_QUEUE_ID_TPC_3_1:
3265                 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3266                 break;
3267
3268         case GAUDI_QUEUE_ID_TPC_3_2:
3269                 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3270                 break;
3271
3272         case GAUDI_QUEUE_ID_TPC_3_3:
3273                 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3274                 break;
3275
3276         case GAUDI_QUEUE_ID_TPC_4_0:
3277                 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3278                 break;
3279
3280         case GAUDI_QUEUE_ID_TPC_4_1:
3281                 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3282                 break;
3283
3284         case GAUDI_QUEUE_ID_TPC_4_2:
3285                 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3286                 break;
3287
3288         case GAUDI_QUEUE_ID_TPC_4_3:
3289                 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3290                 break;
3291
3292         case GAUDI_QUEUE_ID_TPC_5_0:
3293                 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3294                 break;
3295
3296         case GAUDI_QUEUE_ID_TPC_5_1:
3297                 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3298                 break;
3299
3300         case GAUDI_QUEUE_ID_TPC_5_2:
3301                 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3302                 break;
3303
3304         case GAUDI_QUEUE_ID_TPC_5_3:
3305                 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3306                 break;
3307
3308         case GAUDI_QUEUE_ID_TPC_6_0:
3309                 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3310                 break;
3311
3312         case GAUDI_QUEUE_ID_TPC_6_1:
3313                 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3314                 break;
3315
3316         case GAUDI_QUEUE_ID_TPC_6_2:
3317                 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3318                 break;
3319
3320         case GAUDI_QUEUE_ID_TPC_6_3:
3321                 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3322                 break;
3323
3324         case GAUDI_QUEUE_ID_TPC_7_0:
3325                 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3326                 break;
3327
3328         case GAUDI_QUEUE_ID_TPC_7_1:
3329                 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3330                 break;
3331
3332         case GAUDI_QUEUE_ID_TPC_7_2:
3333                 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3334                 break;
3335
3336         case GAUDI_QUEUE_ID_TPC_7_3:
3337                 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3338                 break;
3339
3340         default:
3341                 invalid_queue = true;
3342         }
3343
3344         if (invalid_queue) {
3345                 /* Should never get here */
3346                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3347                         hw_queue_id);
3348                 return;
3349         }
3350
3351         db_value = pi;
3352
3353         /* ring the doorbell */
3354         WREG32(db_reg_offset, db_value);
3355
3356         if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3357                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3358                                 GAUDI_EVENT_PI_UPDATE);
3359 }
3360
3361 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3362                                 struct hl_bd *bd)
3363 {
3364         __le64 *pbd = (__le64 *) bd;
3365
3366         /* The QMANs are on the host memory so a simple copy suffice */
3367         pqe[0] = pbd[0];
3368         pqe[1] = pbd[1];
3369 }
3370
3371 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3372                                         dma_addr_t *dma_handle, gfp_t flags)
3373 {
3374         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3375                                                 dma_handle, flags);
3376
3377         /* Shift to the device's base physical address of host memory */
3378         if (kernel_addr)
3379                 *dma_handle += HOST_PHYS_BASE;
3380
3381         return kernel_addr;
3382 }
3383
3384 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3385                 void *cpu_addr, dma_addr_t dma_handle)
3386 {
3387         /* Cancel the device's base physical address of host memory */
3388         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3389
3390         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3391 }
3392
3393 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3394                                 u32 queue_id, dma_addr_t *dma_handle,
3395                                 u16 *queue_len)
3396 {
3397         struct gaudi_device *gaudi = hdev->asic_specific;
3398         struct gaudi_internal_qman_info *q;
3399
3400         if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3401                         gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3402                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3403                 return NULL;
3404         }
3405
3406         q = &gaudi->internal_qmans[queue_id];
3407         *dma_handle = q->pq_dma_addr;
3408         *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3409
3410         return q->pq_kernel_addr;
3411 }
3412
3413 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3414                                 u16 len, u32 timeout, long *result)
3415 {
3416         struct gaudi_device *gaudi = hdev->asic_specific;
3417
3418         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3419                 if (result)
3420                         *result = 0;
3421                 return 0;
3422         }
3423
3424         if (!timeout)
3425                 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
3426
3427         return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3428                                                 timeout, result);
3429 }
3430
3431 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3432 {
3433         struct packet_msg_prot *fence_pkt;
3434         dma_addr_t pkt_dma_addr;
3435         u32 fence_val, tmp, timeout_usec;
3436         dma_addr_t fence_dma_addr;
3437         u32 *fence_ptr;
3438         int rc;
3439
3440         if (hdev->pldm)
3441                 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3442         else
3443                 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3444
3445         fence_val = GAUDI_QMAN0_FENCE_VAL;
3446
3447         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3448                                                         &fence_dma_addr);
3449         if (!fence_ptr) {
3450                 dev_err(hdev->dev,
3451                         "Failed to allocate memory for H/W queue %d testing\n",
3452                         hw_queue_id);
3453                 return -ENOMEM;
3454         }
3455
3456         *fence_ptr = 0;
3457
3458         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3459                                         sizeof(struct packet_msg_prot),
3460                                         GFP_KERNEL, &pkt_dma_addr);
3461         if (!fence_pkt) {
3462                 dev_err(hdev->dev,
3463                         "Failed to allocate packet for H/W queue %d testing\n",
3464                         hw_queue_id);
3465                 rc = -ENOMEM;
3466                 goto free_fence_ptr;
3467         }
3468
3469         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
3470         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
3471         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
3472
3473         fence_pkt->ctl = cpu_to_le32(tmp);
3474         fence_pkt->value = cpu_to_le32(fence_val);
3475         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3476
3477         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3478                                         sizeof(struct packet_msg_prot),
3479                                         pkt_dma_addr);
3480         if (rc) {
3481                 dev_err(hdev->dev,
3482                         "Failed to send fence packet to H/W queue %d\n",
3483                         hw_queue_id);
3484                 goto free_pkt;
3485         }
3486
3487         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3488                                         1000, timeout_usec, true);
3489
3490         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3491
3492         if (rc == -ETIMEDOUT) {
3493                 dev_err(hdev->dev,
3494                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3495                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3496                 rc = -EIO;
3497         }
3498
3499 free_pkt:
3500         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3501                                         pkt_dma_addr);
3502 free_fence_ptr:
3503         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3504                                         fence_dma_addr);
3505         return rc;
3506 }
3507
3508 static int gaudi_test_cpu_queue(struct hl_device *hdev)
3509 {
3510         struct gaudi_device *gaudi = hdev->asic_specific;
3511
3512         /*
3513          * check capability here as send_cpu_message() won't update the result
3514          * value if no capability
3515          */
3516         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3517                 return 0;
3518
3519         return hl_fw_test_cpu_queue(hdev);
3520 }
3521
3522 static int gaudi_test_queues(struct hl_device *hdev)
3523 {
3524         int i, rc, ret_val = 0;
3525
3526         for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
3527                 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3528                         rc = gaudi_test_queue(hdev, i);
3529                         if (rc)
3530                                 ret_val = -EINVAL;
3531                 }
3532         }
3533
3534         rc = gaudi_test_cpu_queue(hdev);
3535         if (rc)
3536                 ret_val = -EINVAL;
3537
3538         return ret_val;
3539 }
3540
3541 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3542                 gfp_t mem_flags, dma_addr_t *dma_handle)
3543 {
3544         void *kernel_addr;
3545
3546         if (size > GAUDI_DMA_POOL_BLK_SIZE)
3547                 return NULL;
3548
3549         kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3550
3551         /* Shift to the device's base physical address of host memory */
3552         if (kernel_addr)
3553                 *dma_handle += HOST_PHYS_BASE;
3554
3555         return kernel_addr;
3556 }
3557
3558 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3559                         dma_addr_t dma_addr)
3560 {
3561         /* Cancel the device's base physical address of host memory */
3562         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3563
3564         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3565 }
3566
3567 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3568                                         size_t size, dma_addr_t *dma_handle)
3569 {
3570         return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3571 }
3572
3573 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3574                                                 size_t size, void *vaddr)
3575 {
3576         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3577 }
3578
3579 static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3580                         int nents, enum dma_data_direction dir)
3581 {
3582         struct scatterlist *sg;
3583         int i;
3584
3585         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3586                 return -ENOMEM;
3587
3588         /* Shift to the device's base physical address of host memory */
3589         for_each_sg(sgl, sg, nents, i)
3590                 sg->dma_address += HOST_PHYS_BASE;
3591
3592         return 0;
3593 }
3594
3595 static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3596                         int nents, enum dma_data_direction dir)
3597 {
3598         struct scatterlist *sg;
3599         int i;
3600
3601         /* Cancel the device's base physical address of host memory */
3602         for_each_sg(sgl, sg, nents, i)
3603                 sg->dma_address -= HOST_PHYS_BASE;
3604
3605         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3606 }
3607
3608 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3609                                         struct sg_table *sgt)
3610 {
3611         struct scatterlist *sg, *sg_next_iter;
3612         u32 count, dma_desc_cnt;
3613         u64 len, len_next;
3614         dma_addr_t addr, addr_next;
3615
3616         dma_desc_cnt = 0;
3617
3618         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3619
3620                 len = sg_dma_len(sg);
3621                 addr = sg_dma_address(sg);
3622
3623                 if (len == 0)
3624                         break;
3625
3626                 while ((count + 1) < sgt->nents) {
3627                         sg_next_iter = sg_next(sg);
3628                         len_next = sg_dma_len(sg_next_iter);
3629                         addr_next = sg_dma_address(sg_next_iter);
3630
3631                         if (len_next == 0)
3632                                 break;
3633
3634                         if ((addr + len == addr_next) &&
3635                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3636                                 len += len_next;
3637                                 count++;
3638                                 sg = sg_next_iter;
3639                         } else {
3640                                 break;
3641                         }
3642                 }
3643
3644                 dma_desc_cnt++;
3645         }
3646
3647         return dma_desc_cnt * sizeof(struct packet_lin_dma);
3648 }
3649
3650 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3651                                 struct hl_cs_parser *parser,
3652                                 struct packet_lin_dma *user_dma_pkt,
3653                                 u64 addr, enum dma_data_direction dir)
3654 {
3655         struct hl_userptr *userptr;
3656         int rc;
3657
3658         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3659                         parser->job_userptr_list, &userptr))
3660                 goto already_pinned;
3661
3662         userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3663         if (!userptr)
3664                 return -ENOMEM;
3665
3666         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3667                                 userptr);
3668         if (rc)
3669                 goto free_userptr;
3670
3671         list_add_tail(&userptr->job_node, parser->job_userptr_list);
3672
3673         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3674                                         userptr->sgt->nents, dir);
3675         if (rc) {
3676                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3677                 goto unpin_memory;
3678         }
3679
3680         userptr->dma_mapped = true;
3681         userptr->dir = dir;
3682
3683 already_pinned:
3684         parser->patched_cb_size +=
3685                         gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3686
3687         return 0;
3688
3689 unpin_memory:
3690         hl_unpin_host_memory(hdev, userptr);
3691 free_userptr:
3692         kfree(userptr);
3693         return rc;
3694 }
3695
3696 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3697                                 struct hl_cs_parser *parser,
3698                                 struct packet_lin_dma *user_dma_pkt,
3699                                 bool src_in_host)
3700 {
3701         enum dma_data_direction dir;
3702         bool skip_host_mem_pin = false, user_memset;
3703         u64 addr;
3704         int rc = 0;
3705
3706         user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3707                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3708                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3709
3710         if (src_in_host) {
3711                 if (user_memset)
3712                         skip_host_mem_pin = true;
3713
3714                 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3715                 dir = DMA_TO_DEVICE;
3716                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3717         } else {
3718                 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3719                 dir = DMA_FROM_DEVICE;
3720                 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3721                                 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3722                                 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3723         }
3724
3725         if (skip_host_mem_pin)
3726                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3727         else
3728                 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3729                                                 addr, dir);
3730
3731         return rc;
3732 }
3733
3734 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3735                                 struct hl_cs_parser *parser,
3736                                 struct packet_lin_dma *user_dma_pkt)
3737 {
3738         bool src_in_host = false;
3739         u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3740                         GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3741                         GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3742
3743         dev_dbg(hdev->dev, "DMA packet details:\n");
3744         dev_dbg(hdev->dev, "source == 0x%llx\n",
3745                                 le64_to_cpu(user_dma_pkt->src_addr));
3746         dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3747         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3748
3749         /*
3750          * Special handling for DMA with size 0. Bypass all validations
3751          * because no transactions will be done except for WR_COMP, which
3752          * is not a security issue
3753          */
3754         if (!le32_to_cpu(user_dma_pkt->tsize)) {
3755                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3756                 return 0;
3757         }
3758
3759         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3760                 src_in_host = true;
3761
3762         return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3763                                                 src_in_host);
3764 }
3765
3766 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3767                                         struct hl_cs_parser *parser,
3768                                         struct packet_load_and_exe *user_pkt)
3769 {
3770         u32 cfg;
3771
3772         cfg = le32_to_cpu(user_pkt->cfg);
3773
3774         if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3775                 dev_err(hdev->dev,
3776                         "User not allowed to use Load and Execute\n");
3777                 return -EPERM;
3778         }
3779
3780         parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3781
3782         return 0;
3783 }
3784
3785 static int gaudi_validate_cb(struct hl_device *hdev,
3786                         struct hl_cs_parser *parser, bool is_mmu)
3787 {
3788         u32 cb_parsed_length = 0;
3789         int rc = 0;
3790
3791         parser->patched_cb_size = 0;
3792
3793         /* cb_user_size is more than 0 so loop will always be executed */
3794         while (cb_parsed_length < parser->user_cb_size) {
3795                 enum packet_id pkt_id;
3796                 u16 pkt_size;
3797                 struct gaudi_packet *user_pkt;
3798
3799                 user_pkt = (struct gaudi_packet *) (uintptr_t)
3800                         (parser->user_cb->kernel_address + cb_parsed_length);
3801
3802                 pkt_id = (enum packet_id) (
3803                                 (le64_to_cpu(user_pkt->header) &
3804                                 PACKET_HEADER_PACKET_ID_MASK) >>
3805                                         PACKET_HEADER_PACKET_ID_SHIFT);
3806
3807                 if (!validate_packet_id(pkt_id)) {
3808                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3809                         rc = -EINVAL;
3810                         break;
3811                 }
3812
3813                 pkt_size = gaudi_packet_sizes[pkt_id];
3814                 cb_parsed_length += pkt_size;
3815                 if (cb_parsed_length > parser->user_cb_size) {
3816                         dev_err(hdev->dev,
3817                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3818                         rc = -EINVAL;
3819                         break;
3820                 }
3821
3822                 switch (pkt_id) {
3823                 case PACKET_MSG_PROT:
3824                         dev_err(hdev->dev,
3825                                 "User not allowed to use MSG_PROT\n");
3826                         rc = -EPERM;
3827                         break;
3828
3829                 case PACKET_CP_DMA:
3830                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3831                         rc = -EPERM;
3832                         break;
3833
3834                 case PACKET_STOP:
3835                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3836                         rc = -EPERM;
3837                         break;
3838
3839                 case PACKET_WREG_BULK:
3840                         dev_err(hdev->dev,
3841                                 "User not allowed to use WREG_BULK\n");
3842                         rc = -EPERM;
3843                         break;
3844
3845                 case PACKET_LOAD_AND_EXE:
3846                         rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3847                                 (struct packet_load_and_exe *) user_pkt);
3848                         break;
3849
3850                 case PACKET_LIN_DMA:
3851                         parser->contains_dma_pkt = true;
3852                         if (is_mmu)
3853                                 parser->patched_cb_size += pkt_size;
3854                         else
3855                                 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3856                                         (struct packet_lin_dma *) user_pkt);
3857                         break;
3858
3859                 case PACKET_WREG_32:
3860                 case PACKET_MSG_LONG:
3861                 case PACKET_MSG_SHORT:
3862                 case PACKET_REPEAT:
3863                 case PACKET_FENCE:
3864                 case PACKET_NOP:
3865                 case PACKET_ARB_POINT:
3866                         parser->patched_cb_size += pkt_size;
3867                         break;
3868
3869                 default:
3870                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3871                                 pkt_id);
3872                         rc = -EINVAL;
3873                         break;
3874                 }
3875
3876                 if (rc)
3877                         break;
3878         }
3879
3880         /*
3881          * The new CB should have space at the end for two MSG_PROT packets:
3882          * 1. A packet that will act as a completion packet
3883          * 2. A packet that will generate MSI-X interrupt
3884          */
3885         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3886
3887         return rc;
3888 }
3889
3890 static int gaudi_patch_dma_packet(struct hl_device *hdev,
3891                                 struct hl_cs_parser *parser,
3892                                 struct packet_lin_dma *user_dma_pkt,
3893                                 struct packet_lin_dma *new_dma_pkt,
3894                                 u32 *new_dma_pkt_size)
3895 {
3896         struct hl_userptr *userptr;
3897         struct scatterlist *sg, *sg_next_iter;
3898         u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3899         u64 len, len_next;
3900         dma_addr_t dma_addr, dma_addr_next;
3901         u64 device_memory_addr, addr;
3902         enum dma_data_direction dir;
3903         struct sg_table *sgt;
3904         bool src_in_host = false;
3905         bool skip_host_mem_pin = false;
3906         bool user_memset;
3907
3908         ctl = le32_to_cpu(user_dma_pkt->ctl);
3909
3910         if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3911                 src_in_host = true;
3912
3913         user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3914                         GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3915
3916         if (src_in_host) {
3917                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3918                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3919                 dir = DMA_TO_DEVICE;
3920                 if (user_memset)
3921                         skip_host_mem_pin = true;
3922         } else {
3923                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3924                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3925                 dir = DMA_FROM_DEVICE;
3926         }
3927
3928         if ((!skip_host_mem_pin) &&
3929                 (!hl_userptr_is_pinned(hdev, addr,
3930                                         le32_to_cpu(user_dma_pkt->tsize),
3931                                         parser->job_userptr_list, &userptr))) {
3932                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3933                                 addr, user_dma_pkt->tsize);
3934                 return -EFAULT;
3935         }
3936
3937         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3938                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3939                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3940                 return 0;
3941         }
3942
3943         user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3944
3945         sgt = userptr->sgt;
3946         dma_desc_cnt = 0;
3947
3948         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3949                 len = sg_dma_len(sg);
3950                 dma_addr = sg_dma_address(sg);
3951
3952                 if (len == 0)
3953                         break;
3954
3955                 while ((count + 1) < sgt->nents) {
3956                         sg_next_iter = sg_next(sg);
3957                         len_next = sg_dma_len(sg_next_iter);
3958                         dma_addr_next = sg_dma_address(sg_next_iter);
3959
3960                         if (len_next == 0)
3961                                 break;
3962
3963                         if ((dma_addr + len == dma_addr_next) &&
3964                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3965                                 len += len_next;
3966                                 count++;
3967                                 sg = sg_next_iter;
3968                         } else {
3969                                 break;
3970                         }
3971                 }
3972
3973                 ctl = le32_to_cpu(user_dma_pkt->ctl);
3974                 if (likely(dma_desc_cnt))
3975                         ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3976                 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3977                 new_dma_pkt->ctl = cpu_to_le32(ctl);
3978                 new_dma_pkt->tsize = cpu_to_le32(len);
3979
3980                 if (dir == DMA_TO_DEVICE) {
3981                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3982                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3983                 } else {
3984                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3985                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3986                 }
3987
3988                 if (!user_memset)
3989                         device_memory_addr += len;
3990                 dma_desc_cnt++;
3991                 new_dma_pkt++;
3992         }
3993
3994         if (!dma_desc_cnt) {
3995                 dev_err(hdev->dev,
3996                         "Error of 0 SG entries when patching DMA packet\n");
3997                 return -EFAULT;
3998         }
3999
4000         /* Fix the last dma packet - wrcomp must be as user set it */
4001         new_dma_pkt--;
4002         new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4003
4004         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4005
4006         return 0;
4007 }
4008
4009 static int gaudi_patch_cb(struct hl_device *hdev,
4010                                 struct hl_cs_parser *parser)
4011 {
4012         u32 cb_parsed_length = 0;
4013         u32 cb_patched_cur_length = 0;
4014         int rc = 0;
4015
4016         /* cb_user_size is more than 0 so loop will always be executed */
4017         while (cb_parsed_length < parser->user_cb_size) {
4018                 enum packet_id pkt_id;
4019                 u16 pkt_size;
4020                 u32 new_pkt_size = 0;
4021                 struct gaudi_packet *user_pkt, *kernel_pkt;
4022
4023                 user_pkt = (struct gaudi_packet *) (uintptr_t)
4024                         (parser->user_cb->kernel_address + cb_parsed_length);
4025                 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4026                         (parser->patched_cb->kernel_address +
4027                                         cb_patched_cur_length);
4028
4029                 pkt_id = (enum packet_id) (
4030                                 (le64_to_cpu(user_pkt->header) &
4031                                 PACKET_HEADER_PACKET_ID_MASK) >>
4032                                         PACKET_HEADER_PACKET_ID_SHIFT);
4033
4034                 if (!validate_packet_id(pkt_id)) {
4035                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
4036                         rc = -EINVAL;
4037                         break;
4038                 }
4039
4040                 pkt_size = gaudi_packet_sizes[pkt_id];
4041                 cb_parsed_length += pkt_size;
4042                 if (cb_parsed_length > parser->user_cb_size) {
4043                         dev_err(hdev->dev,
4044                                 "packet 0x%x is out of CB boundary\n", pkt_id);
4045                         rc = -EINVAL;
4046                         break;
4047                 }
4048
4049                 switch (pkt_id) {
4050                 case PACKET_LIN_DMA:
4051                         rc = gaudi_patch_dma_packet(hdev, parser,
4052                                         (struct packet_lin_dma *) user_pkt,
4053                                         (struct packet_lin_dma *) kernel_pkt,
4054                                         &new_pkt_size);
4055                         cb_patched_cur_length += new_pkt_size;
4056                         break;
4057
4058                 case PACKET_MSG_PROT:
4059                         dev_err(hdev->dev,
4060                                 "User not allowed to use MSG_PROT\n");
4061                         rc = -EPERM;
4062                         break;
4063
4064                 case PACKET_CP_DMA:
4065                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4066                         rc = -EPERM;
4067                         break;
4068
4069                 case PACKET_STOP:
4070                         dev_err(hdev->dev, "User not allowed to use STOP\n");
4071                         rc = -EPERM;
4072                         break;
4073
4074                 case PACKET_WREG_32:
4075                 case PACKET_WREG_BULK:
4076                 case PACKET_MSG_LONG:
4077                 case PACKET_MSG_SHORT:
4078                 case PACKET_REPEAT:
4079                 case PACKET_FENCE:
4080                 case PACKET_NOP:
4081                 case PACKET_ARB_POINT:
4082                 case PACKET_LOAD_AND_EXE:
4083                         memcpy(kernel_pkt, user_pkt, pkt_size);
4084                         cb_patched_cur_length += pkt_size;
4085                         break;
4086
4087                 default:
4088                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4089                                 pkt_id);
4090                         rc = -EINVAL;
4091                         break;
4092                 }
4093
4094                 if (rc)
4095                         break;
4096         }
4097
4098         return rc;
4099 }
4100
4101 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4102                 struct hl_cs_parser *parser)
4103 {
4104         u64 patched_cb_handle;
4105         u32 patched_cb_size;
4106         struct hl_cb *user_cb;
4107         int rc;
4108
4109         /*
4110          * The new CB should have space at the end for two MSG_PROT pkt:
4111          * 1. A packet that will act as a completion packet
4112          * 2. A packet that will generate MSI interrupt
4113          */
4114         parser->patched_cb_size = parser->user_cb_size +
4115                         sizeof(struct packet_msg_prot) * 2;
4116
4117         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4118                                 parser->patched_cb_size, false,
4119                                 &patched_cb_handle);
4120
4121         if (rc) {
4122                 dev_err(hdev->dev,
4123                         "Failed to allocate patched CB for DMA CS %d\n",
4124                         rc);
4125                 return rc;
4126         }
4127
4128         patched_cb_handle >>= PAGE_SHIFT;
4129         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4130                                 (u32) patched_cb_handle);
4131         /* hl_cb_get should never fail here so use kernel WARN */
4132         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4133                         (u32) patched_cb_handle);
4134         if (!parser->patched_cb) {
4135                 rc = -EFAULT;
4136                 goto out;
4137         }
4138
4139         /*
4140          * The check that parser->user_cb_size <= parser->user_cb->size was done
4141          * in validate_queue_index().
4142          */
4143         memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4144                 (void *) (uintptr_t) parser->user_cb->kernel_address,
4145                 parser->user_cb_size);
4146
4147         patched_cb_size = parser->patched_cb_size;
4148
4149         /* Validate patched CB instead of user CB */
4150         user_cb = parser->user_cb;
4151         parser->user_cb = parser->patched_cb;
4152         rc = gaudi_validate_cb(hdev, parser, true);
4153         parser->user_cb = user_cb;
4154
4155         if (rc) {
4156                 hl_cb_put(parser->patched_cb);
4157                 goto out;
4158         }
4159
4160         if (patched_cb_size != parser->patched_cb_size) {
4161                 dev_err(hdev->dev, "user CB size mismatch\n");
4162                 hl_cb_put(parser->patched_cb);
4163                 rc = -EINVAL;
4164                 goto out;
4165         }
4166
4167 out:
4168         /*
4169          * Always call cb destroy here because we still have 1 reference
4170          * to it by calling cb_get earlier. After the job will be completed,
4171          * cb_put will release it, but here we want to remove it from the
4172          * idr
4173          */
4174         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4175                                         patched_cb_handle << PAGE_SHIFT);
4176
4177         return rc;
4178 }
4179
4180 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4181                 struct hl_cs_parser *parser)
4182 {
4183         u64 patched_cb_handle;
4184         int rc;
4185
4186         rc = gaudi_validate_cb(hdev, parser, false);
4187
4188         if (rc)
4189                 goto free_userptr;
4190
4191         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4192                                 parser->patched_cb_size, false,
4193                                 &patched_cb_handle);
4194         if (rc) {
4195                 dev_err(hdev->dev,
4196                         "Failed to allocate patched CB for DMA CS %d\n", rc);
4197                 goto free_userptr;
4198         }
4199
4200         patched_cb_handle >>= PAGE_SHIFT;
4201         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4202                                 (u32) patched_cb_handle);
4203         /* hl_cb_get should never fail here so use kernel WARN */
4204         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4205                         (u32) patched_cb_handle);
4206         if (!parser->patched_cb) {
4207                 rc = -EFAULT;
4208                 goto out;
4209         }
4210
4211         rc = gaudi_patch_cb(hdev, parser);
4212
4213         if (rc)
4214                 hl_cb_put(parser->patched_cb);
4215
4216 out:
4217         /*
4218          * Always call cb destroy here because we still have 1 reference
4219          * to it by calling cb_get earlier. After the job will be completed,
4220          * cb_put will release it, but here we want to remove it from the
4221          * idr
4222          */
4223         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4224                                 patched_cb_handle << PAGE_SHIFT);
4225
4226 free_userptr:
4227         if (rc)
4228                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4229         return rc;
4230 }
4231
4232 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4233                                         struct hl_cs_parser *parser)
4234 {
4235         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4236
4237         /* For internal queue jobs just check if CB address is valid */
4238         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4239                                         parser->user_cb_size,
4240                                         asic_prop->sram_user_base_address,
4241                                         asic_prop->sram_end_address))
4242                 return 0;
4243
4244         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4245                                         parser->user_cb_size,
4246                                         asic_prop->dram_user_base_address,
4247                                         asic_prop->dram_end_address))
4248                 return 0;
4249
4250         /* PMMU and HPMMU addresses are equal, check only one of them */
4251         if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4252                                         parser->user_cb_size,
4253                                         asic_prop->pmmu.start_addr,
4254                                         asic_prop->pmmu.end_addr))
4255                 return 0;
4256
4257         dev_err(hdev->dev,
4258                 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4259                 parser->user_cb, parser->user_cb_size);
4260
4261         return -EFAULT;
4262 }
4263
4264 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4265 {
4266         struct gaudi_device *gaudi = hdev->asic_specific;
4267
4268         if (parser->queue_type == QUEUE_TYPE_INT)
4269                 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4270
4271         if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4272                 return gaudi_parse_cb_mmu(hdev, parser);
4273         else
4274                 return gaudi_parse_cb_no_mmu(hdev, parser);
4275 }
4276
4277 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4278                                         u64 kernel_address, u32 len,
4279                                         u64 cq_addr, u32 cq_val, u32 msi_vec,
4280                                         bool eb)
4281 {
4282         struct gaudi_device *gaudi = hdev->asic_specific;
4283         struct packet_msg_prot *cq_pkt;
4284         u32 tmp;
4285
4286         cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4287                 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4288
4289         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4290         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4291
4292         if (eb)
4293                 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4294
4295         cq_pkt->ctl = cpu_to_le32(tmp);
4296         cq_pkt->value = cpu_to_le32(cq_val);
4297         cq_pkt->addr = cpu_to_le64(cq_addr);
4298
4299         cq_pkt++;
4300
4301         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4302         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4303         cq_pkt->ctl = cpu_to_le32(tmp);
4304         cq_pkt->value = cpu_to_le32(1);
4305
4306         if (!gaudi->multi_msi_mode)
4307                 msi_vec = 0;
4308
4309         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4310 }
4311
4312 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4313 {
4314         WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4315 }
4316
4317 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4318                                         u32 size, u64 val)
4319 {
4320         struct packet_lin_dma *lin_dma_pkt;
4321         struct hl_cs_job *job;
4322         u32 cb_size, ctl, err_cause;
4323         struct hl_cb *cb;
4324         int rc;
4325
4326         cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
4327         if (!cb)
4328                 return -EFAULT;
4329
4330         lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4331         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4332         cb_size = sizeof(*lin_dma_pkt);
4333
4334         ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
4335         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
4336         ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
4337         ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4338         ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
4339
4340         lin_dma_pkt->ctl = cpu_to_le32(ctl);
4341         lin_dma_pkt->src_addr = cpu_to_le64(val);
4342         lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4343         lin_dma_pkt->tsize = cpu_to_le32(size);
4344
4345         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4346         if (!job) {
4347                 dev_err(hdev->dev, "Failed to allocate a new job\n");
4348                 rc = -ENOMEM;
4349                 goto release_cb;
4350         }
4351
4352         /* Verify DMA is OK */
4353         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4354         if (err_cause && !hdev->init_done) {
4355                 dev_dbg(hdev->dev,
4356                         "Clearing DMA0 engine from errors (cause 0x%x)\n",
4357                         err_cause);
4358                 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4359         }
4360
4361         job->id = 0;
4362         job->user_cb = cb;
4363         job->user_cb->cs_cnt++;
4364         job->user_cb_size = cb_size;
4365         job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4366         job->patched_cb = job->user_cb;
4367         job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4368
4369         hl_debugfs_add_job(hdev, job);
4370
4371         rc = gaudi_send_job_on_qman0(hdev, job);
4372         hl_debugfs_remove_job(hdev, job);
4373         kfree(job);
4374         cb->cs_cnt--;
4375
4376         /* Verify DMA is OK */
4377         err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
4378         if (err_cause) {
4379                 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
4380                 rc = -EIO;
4381                 if (!hdev->init_done) {
4382                         dev_dbg(hdev->dev,
4383                                 "Clearing DMA0 engine from errors (cause 0x%x)\n",
4384                                 err_cause);
4385                         WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
4386                 }
4387         }
4388
4389 release_cb:
4390         hl_cb_put(cb);
4391         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4392
4393         return rc;
4394 }
4395
4396 static void gaudi_restore_sm_registers(struct hl_device *hdev)
4397 {
4398         int i;
4399
4400         for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4401                 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4402                 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4403                 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4404         }
4405
4406         for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4407                 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4408                 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4409                 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4410         }
4411
4412         i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4413
4414         for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4415                 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4416
4417         i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4418
4419         for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4420                 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4421 }
4422
4423 static void gaudi_restore_dma_registers(struct hl_device *hdev)
4424 {
4425         u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4426                         mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4427         int i;
4428
4429         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4430                 u64 sob_addr = CFG_BASE +
4431                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4432                                 (i * sob_delta);
4433                 u32 dma_offset = i * DMA_CORE_OFFSET;
4434
4435                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4436                                 lower_32_bits(sob_addr));
4437                 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4438                                 upper_32_bits(sob_addr));
4439                 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4440
4441                 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4442                  * modified by the user for SRAM reduction
4443                  */
4444                 if (i > 1)
4445                         WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4446                                                                 0x00000001);
4447         }
4448 }
4449
4450 static void gaudi_restore_qm_registers(struct hl_device *hdev)
4451 {
4452         u32 qman_offset;
4453         int i;
4454
4455         for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4456                 qman_offset = i * DMA_QMAN_OFFSET;
4457                 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4458         }
4459
4460         for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4461                 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4462                 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4463         }
4464
4465         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4466                 qman_offset = i * TPC_QMAN_OFFSET;
4467                 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4468         }
4469 }
4470
4471 static void gaudi_restore_user_registers(struct hl_device *hdev)
4472 {
4473         gaudi_restore_sm_registers(hdev);
4474         gaudi_restore_dma_registers(hdev);
4475         gaudi_restore_qm_registers(hdev);
4476 }
4477
4478 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4479 {
4480         struct asic_fixed_properties *prop = &hdev->asic_prop;
4481         u64 addr = prop->sram_user_base_address;
4482         u32 size = hdev->pldm ? 0x10000 :
4483                         (prop->sram_size - SRAM_USER_BASE_OFFSET);
4484         u64 val = 0x7777777777777777ull;
4485         int rc;
4486
4487         rc = gaudi_memset_device_memory(hdev, addr, size, val);
4488         if (rc) {
4489                 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4490                 return rc;
4491         }
4492
4493         gaudi_mmu_prepare(hdev, asid);
4494
4495         gaudi_restore_user_registers(hdev);
4496
4497         return 0;
4498 }
4499
4500 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4501 {
4502         struct asic_fixed_properties *prop = &hdev->asic_prop;
4503         struct gaudi_device *gaudi = hdev->asic_specific;
4504         u64 addr = prop->mmu_pgt_addr;
4505         u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4506
4507         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4508                 return 0;
4509
4510         return gaudi_memset_device_memory(hdev, addr, size, 0);
4511 }
4512
4513 static void gaudi_restore_phase_topology(struct hl_device *hdev)
4514 {
4515
4516 }
4517
4518 static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4519 {
4520         struct asic_fixed_properties *prop = &hdev->asic_prop;
4521         struct gaudi_device *gaudi = hdev->asic_specific;
4522         u64 hbm_bar_addr;
4523         int rc = 0;
4524
4525         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4526
4527                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4528                                 (hdev->clock_gating_mask &
4529                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4530
4531                         dev_err_ratelimited(hdev->dev,
4532                                 "Can't read register - clock gating is enabled!\n");
4533                         rc = -EFAULT;
4534                 } else {
4535                         *val = RREG32(addr - CFG_BASE);
4536                 }
4537
4538         } else if ((addr >= SRAM_BASE_ADDR) &&
4539                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4540                 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4541                                 (addr - SRAM_BASE_ADDR));
4542         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4543                 u64 bar_base_addr = DRAM_PHYS_BASE +
4544                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4545
4546                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4547                 if (hbm_bar_addr != U64_MAX) {
4548                         *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4549                                                 (addr - bar_base_addr));
4550
4551                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4552                                                 hbm_bar_addr);
4553                 }
4554                 if (hbm_bar_addr == U64_MAX)
4555                         rc = -EIO;
4556         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4557                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4558         } else {
4559                 rc = -EFAULT;
4560         }
4561
4562         return rc;
4563 }
4564
4565 static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4566 {
4567         struct asic_fixed_properties *prop = &hdev->asic_prop;
4568         struct gaudi_device *gaudi = hdev->asic_specific;
4569         u64 hbm_bar_addr;
4570         int rc = 0;
4571
4572         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4573
4574                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4575                                 (hdev->clock_gating_mask &
4576                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4577
4578                         dev_err_ratelimited(hdev->dev,
4579                                 "Can't write register - clock gating is enabled!\n");
4580                         rc = -EFAULT;
4581                 } else {
4582                         WREG32(addr - CFG_BASE, val);
4583                 }
4584
4585         } else if ((addr >= SRAM_BASE_ADDR) &&
4586                         (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4587                 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4588                                         (addr - SRAM_BASE_ADDR));
4589         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4590                 u64 bar_base_addr = DRAM_PHYS_BASE +
4591                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4592
4593                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4594                 if (hbm_bar_addr != U64_MAX) {
4595                         writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4596                                                 (addr - bar_base_addr));
4597
4598                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4599                                                 hbm_bar_addr);
4600                 }
4601                 if (hbm_bar_addr == U64_MAX)
4602                         rc = -EIO;
4603         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4604                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4605         } else {
4606                 rc = -EFAULT;
4607         }
4608
4609         return rc;
4610 }
4611
4612 static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4613 {
4614         struct asic_fixed_properties *prop = &hdev->asic_prop;
4615         struct gaudi_device *gaudi = hdev->asic_specific;
4616         u64 hbm_bar_addr;
4617         int rc = 0;
4618
4619         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4620
4621                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4622                                 (hdev->clock_gating_mask &
4623                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4624
4625                         dev_err_ratelimited(hdev->dev,
4626                                 "Can't read register - clock gating is enabled!\n");
4627                         rc = -EFAULT;
4628                 } else {
4629                         u32 val_l = RREG32(addr - CFG_BASE);
4630                         u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4631
4632                         *val = (((u64) val_h) << 32) | val_l;
4633                 }
4634
4635         } else if ((addr >= SRAM_BASE_ADDR) &&
4636                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4637                 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4638                                 (addr - SRAM_BASE_ADDR));
4639         } else if (addr <=
4640                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4641                 u64 bar_base_addr = DRAM_PHYS_BASE +
4642                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4643
4644                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4645                 if (hbm_bar_addr != U64_MAX) {
4646                         *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4647                                                 (addr - bar_base_addr));
4648
4649                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4650                                                 hbm_bar_addr);
4651                 }
4652                 if (hbm_bar_addr == U64_MAX)
4653                         rc = -EIO;
4654         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4655                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4656         } else {
4657                 rc = -EFAULT;
4658         }
4659
4660         return rc;
4661 }
4662
4663 static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4664 {
4665         struct asic_fixed_properties *prop = &hdev->asic_prop;
4666         struct gaudi_device *gaudi = hdev->asic_specific;
4667         u64 hbm_bar_addr;
4668         int rc = 0;
4669
4670         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4671
4672                 if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) &&
4673                                 (hdev->clock_gating_mask &
4674                                                 GAUDI_CLK_GATE_DEBUGFS_MASK)) {
4675
4676                         dev_err_ratelimited(hdev->dev,
4677                                 "Can't write register - clock gating is enabled!\n");
4678                         rc = -EFAULT;
4679                 } else {
4680                         WREG32(addr - CFG_BASE, lower_32_bits(val));
4681                         WREG32(addr + sizeof(u32) - CFG_BASE,
4682                                 upper_32_bits(val));
4683                 }
4684
4685         } else if ((addr >= SRAM_BASE_ADDR) &&
4686                    (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4687                 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4688                                         (addr - SRAM_BASE_ADDR));
4689         } else if (addr <=
4690                     DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4691                 u64 bar_base_addr = DRAM_PHYS_BASE +
4692                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4693
4694                 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4695                 if (hbm_bar_addr != U64_MAX) {
4696                         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4697                                                 (addr - bar_base_addr));
4698
4699                         hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4700                                                 hbm_bar_addr);
4701                 }
4702                 if (hbm_bar_addr == U64_MAX)
4703                         rc = -EIO;
4704         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4705                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4706         } else {
4707                 rc = -EFAULT;
4708         }
4709
4710         return rc;
4711 }
4712
4713 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4714 {
4715         struct gaudi_device *gaudi = hdev->asic_specific;
4716
4717         if (hdev->hard_reset_pending)
4718                 return U64_MAX;
4719
4720         return readq(hdev->pcie_bar[HBM_BAR_ID] +
4721                         (addr - gaudi->hbm_bar_cur_addr));
4722 }
4723
4724 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4725 {
4726         struct gaudi_device *gaudi = hdev->asic_specific;
4727
4728         if (hdev->hard_reset_pending)
4729                 return;
4730
4731         writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4732                         (addr - gaudi->hbm_bar_cur_addr));
4733 }
4734
4735 static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4736 {
4737         /* mask to zero the MMBP and ASID bits */
4738         WREG32_AND(reg, ~0x7FF);
4739         WREG32_OR(reg, asid);
4740 }
4741
4742 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4743 {
4744         struct gaudi_device *gaudi = hdev->asic_specific;
4745
4746         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4747                 return;
4748
4749         if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4750                 WARN(1, "asid %u is too big\n", asid);
4751                 return;
4752         }
4753
4754         mutex_lock(&gaudi->clk_gate_mutex);
4755
4756         hdev->asic_funcs->disable_clock_gating(hdev);
4757
4758         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4759         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4760         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4761         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4762         gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4763
4764         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4765         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4766         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4767         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4768         gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4769
4770         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4771         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4772         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4773         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4774         gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4775
4776         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780         gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781
4782         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4783         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4784         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4785         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4786         gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4787
4788         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4789         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4790         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4791         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4792         gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4793
4794         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798         gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799
4800         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4801         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4802         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4803         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4804         gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4805
4806         gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4807         gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4808         gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4809         gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4810         gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4811         gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4812         gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4813         gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4814
4815         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4816         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4817         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4818         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4819         gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4820         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4821         gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4822
4823         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4824         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4825         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4826         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4827         gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4828         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4829         gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4830
4831         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4832         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4833         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4834         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4835         gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4836         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4837         gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4838
4839         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4840         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4841         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4842         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4843         gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4844         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4845         gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4846
4847         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4848         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4849         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4850         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4851         gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4852         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4853         gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4854
4855         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4856         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4857         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4858         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4859         gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4860         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4861         gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4862
4863         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4864         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4865         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4866         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4867         gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4868         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4869         gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4870
4871         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4872         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4873         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4874         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4875         gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4876         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4877         gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4878
4879         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4880         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4881         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4882         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4883         gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4884         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4885         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4886         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4887         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4888         gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4889
4890         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4891         gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4892         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4893         gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4894         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4895         gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4896         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4897         gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4898         gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4899         gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4900         gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4901         gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4902
4903         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4904         gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4905
4906         hdev->asic_funcs->set_clock_gating(hdev);
4907
4908         mutex_unlock(&gaudi->clk_gate_mutex);
4909 }
4910
4911 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4912                 struct hl_cs_job *job)
4913 {
4914         struct packet_msg_prot *fence_pkt;
4915         u32 *fence_ptr;
4916         dma_addr_t fence_dma_addr;
4917         struct hl_cb *cb;
4918         u32 tmp, timeout, dma_offset;
4919         int rc;
4920
4921         if (hdev->pldm)
4922                 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4923         else
4924                 timeout = HL_DEVICE_TIMEOUT_USEC;
4925
4926         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4927                 dev_err_ratelimited(hdev->dev,
4928                         "Can't send driver job on QMAN0 because the device is not idle\n");
4929                 return -EBUSY;
4930         }
4931
4932         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4933                                                         &fence_dma_addr);
4934         if (!fence_ptr) {
4935                 dev_err(hdev->dev,
4936                         "Failed to allocate fence memory for QMAN0\n");
4937                 return -ENOMEM;
4938         }
4939
4940         cb = job->patched_cb;
4941
4942         fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4943                         job->job_cb_size - sizeof(struct packet_msg_prot));
4944
4945         tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4946         tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4947         tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4948
4949         fence_pkt->ctl = cpu_to_le32(tmp);
4950         fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4951         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4952
4953         dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4954
4955         WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4956
4957         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4958                                         job->job_cb_size, cb->bus_address);
4959         if (rc) {
4960                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4961                 goto free_fence_ptr;
4962         }
4963
4964         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4965                                 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4966                                 timeout, true);
4967
4968         hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4969
4970         if (rc == -ETIMEDOUT) {
4971                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4972                 goto free_fence_ptr;
4973         }
4974
4975 free_fence_ptr:
4976         WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4977                         ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4978
4979         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4980                                         fence_dma_addr);
4981         return rc;
4982 }
4983
4984 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4985 {
4986         if (event_type >= GAUDI_EVENT_SIZE)
4987                 goto event_not_supported;
4988
4989         if (!gaudi_irq_map_table[event_type].valid)
4990                 goto event_not_supported;
4991
4992         snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4993
4994         return;
4995
4996 event_not_supported:
4997         snprintf(desc, size, "N/A");
4998 }
4999
5000 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5001                                                         u32 x_y, bool is_write)
5002 {
5003         u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5004
5005         mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5006                                 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5007
5008         switch (x_y) {
5009         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5010         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5011                 dma_id[0] = 0;
5012                 dma_id[1] = 2;
5013                 break;
5014         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5015         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5016                 dma_id[0] = 1;
5017                 dma_id[1] = 3;
5018                 break;
5019         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5020         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5021                 dma_id[0] = 4;
5022                 dma_id[1] = 6;
5023                 break;
5024         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5025         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5026                 dma_id[0] = 5;
5027                 dma_id[1] = 7;
5028                 break;
5029         default:
5030                 goto unknown_initiator;
5031         }
5032
5033         for (i = 0 ; i < 2 ; i++) {
5034                 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5035                 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5036         }
5037
5038         switch (x_y) {
5039         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5040         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5041                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5042                         return "DMA0";
5043                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5044                         return "DMA2";
5045                 else
5046                         return "DMA0 or DMA2";
5047         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5048         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5049                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5050                         return "DMA1";
5051                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5052                         return "DMA3";
5053                 else
5054                         return "DMA1 or DMA3";
5055         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5056         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5057                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5058                         return "DMA4";
5059                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5060                         return "DMA6";
5061                 else
5062                         return "DMA4 or DMA6";
5063         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5064         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5065                 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5066                         return "DMA5";
5067                 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5068                         return "DMA7";
5069                 else
5070                         return "DMA5 or DMA7";
5071         }
5072
5073 unknown_initiator:
5074         return "unknown initiator";
5075 }
5076
5077 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5078                                                         bool is_write)
5079 {
5080         u32 val, x_y, axi_id;
5081
5082         val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5083                                 RREG32(mmMMU_UP_RAZWI_READ_ID);
5084         x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5085                         (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5086         axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5087                         RAZWI_INITIATOR_AXI_ID_SHIFT);
5088
5089         switch (x_y) {
5090         case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5091                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5092                         return "TPC0";
5093                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5094                         return "NIC0";
5095                 break;
5096         case RAZWI_INITIATOR_ID_X_Y_TPC1:
5097                 return "TPC1";
5098         case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5099         case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5100                 return "MME0";
5101         case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5102         case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5103                 return "MME1";
5104         case RAZWI_INITIATOR_ID_X_Y_TPC2:
5105                 return "TPC2";
5106         case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5107                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5108                         return "TPC3";
5109                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5110                         return "PCI";
5111                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5112                         return "CPU";
5113                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5114                         return "PSOC";
5115                 break;
5116         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5117         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5118         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5119         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5120         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5121         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5122         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5123         case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5124                 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5125         case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5126                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5127                         return "TPC4";
5128                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5129                         return "NIC1";
5130                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5131                         return "NIC2";
5132                 break;
5133         case RAZWI_INITIATOR_ID_X_Y_TPC5:
5134                 return "TPC5";
5135         case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5136         case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5137                 return "MME2";
5138         case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5139         case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5140                 return "MME3";
5141         case RAZWI_INITIATOR_ID_X_Y_TPC6:
5142                 return "TPC6";
5143         case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5144                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5145                         return "TPC7";
5146                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5147                         return "NIC4";
5148                 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5149                         return "NIC5";
5150                 break;
5151         default:
5152                 break;
5153         }
5154
5155         dev_err(hdev->dev,
5156                 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5157                 val,
5158                 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5159                 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5160                 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5161                         RAZWI_INITIATOR_AXI_ID_MASK);
5162
5163         return "unknown initiator";
5164 }
5165
5166 static void gaudi_print_razwi_info(struct hl_device *hdev)
5167 {
5168         if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5169                 dev_err_ratelimited(hdev->dev,
5170                         "RAZWI event caused by illegal write of %s\n",
5171                         gaudi_get_razwi_initiator_name(hdev, true));
5172                 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5173         }
5174
5175         if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5176                 dev_err_ratelimited(hdev->dev,
5177                         "RAZWI event caused by illegal read of %s\n",
5178                         gaudi_get_razwi_initiator_name(hdev, false));
5179                 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5180         }
5181 }
5182
5183 static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5184 {
5185         struct gaudi_device *gaudi = hdev->asic_specific;
5186         u64 addr;
5187         u32 val;
5188
5189         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5190                 return;
5191
5192         val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5193         if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5194                 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5195                 addr <<= 32;
5196                 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5197
5198                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5199                                         addr);
5200
5201                 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5202         }
5203
5204         val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5205         if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5206                 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5207                 addr <<= 32;
5208                 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5209
5210                 dev_err_ratelimited(hdev->dev,
5211                                 "MMU access error on va 0x%llx\n", addr);
5212
5213                 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5214         }
5215 }
5216
5217 /*
5218  *  +-------------------+------------------------------------------------------+
5219  *  | Configuration Reg |                     Description                      |
5220  *  |      Address      |                                                      |
5221  *  +-------------------+------------------------------------------------------+
5222  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
5223  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
5224  *  |                   |0xF34 memory wrappers 63:32                           |
5225  *  |                   |0xF38 memory wrappers 95:64                           |
5226  *  |                   |0xF3C memory wrappers 127:96                          |
5227  *  +-------------------+------------------------------------------------------+
5228  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
5229  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
5230  *  |                   |0xF44 memory wrappers 63:32                           |
5231  *  |                   |0xF48 memory wrappers 95:64                           |
5232  *  |                   |0xF4C memory wrappers 127:96                          |
5233  *  +-------------------+------------------------------------------------------+
5234  */
5235 static int gaudi_extract_ecc_info(struct hl_device *hdev,
5236                 struct ecc_info_extract_params *params, u64 *ecc_address,
5237                 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
5238 {
5239         struct gaudi_device *gaudi = hdev->asic_specific;
5240         u32 i, num_mem_regs, reg, err_bit;
5241         u64 err_addr, err_word = 0;
5242         int rc = 0;
5243
5244         num_mem_regs = params->num_memories / 32 +
5245                         ((params->num_memories % 32) ? 1 : 0);
5246
5247         if (params->block_address >= CFG_BASE)
5248                 params->block_address -= CFG_BASE;
5249
5250         if (params->derr)
5251                 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
5252         else
5253                 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
5254
5255         if (params->disable_clock_gating) {
5256                 mutex_lock(&gaudi->clk_gate_mutex);
5257                 hdev->asic_funcs->disable_clock_gating(hdev);
5258         }
5259
5260         /* Set invalid wrapper index */
5261         *memory_wrapper_idx = 0xFF;
5262
5263         /* Iterate through memory wrappers, a single bit must be set */
5264         for (i = 0 ; i < num_mem_regs ; i++) {
5265                 err_addr += i * 4;
5266                 err_word = RREG32(err_addr);
5267                 if (err_word) {
5268                         err_bit = __ffs(err_word);
5269                         *memory_wrapper_idx = err_bit + (32 * i);
5270                         break;
5271                 }
5272         }
5273
5274         if (*memory_wrapper_idx == 0xFF) {
5275                 dev_err(hdev->dev, "ECC error information cannot be found\n");
5276                 rc = -EINVAL;
5277                 goto enable_clk_gate;
5278         }
5279
5280         WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
5281                         *memory_wrapper_idx);
5282
5283         *ecc_address =
5284                 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
5285         *ecc_syndrom =
5286                 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
5287
5288         /* Clear error indication */
5289         reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
5290         if (params->derr)
5291                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
5292         else
5293                 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
5294
5295         WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
5296
5297 enable_clk_gate:
5298         if (params->disable_clock_gating) {
5299                 hdev->asic_funcs->set_clock_gating(hdev);
5300
5301                 mutex_unlock(&gaudi->clk_gate_mutex);
5302         }
5303
5304         return rc;
5305 }
5306
5307 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5308                                           const char *qm_name,
5309                                           u64 glbl_sts_addr,
5310                                           u64 arb_err_addr)
5311 {
5312         u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5313         char reg_desc[32];
5314
5315         /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5316         for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5317                 glbl_sts_clr_val = 0;
5318                 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5319
5320                 if (!glbl_sts_val)
5321                         continue;
5322
5323                 if (i == QMAN_STREAMS)
5324                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5325                 else
5326                         snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5327
5328                 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5329                         if (glbl_sts_val & BIT(j)) {
5330                                 dev_err_ratelimited(hdev->dev,
5331                                                 "%s %s. err cause: %s\n",
5332                                                 qm_name, reg_desc,
5333                                                 gaudi_qman_error_cause[j]);
5334                                 glbl_sts_clr_val |= BIT(j);
5335                         }
5336                 }
5337
5338                 /* Write 1 clear errors */
5339                 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5340         }
5341
5342         arb_err_val = RREG32(arb_err_addr);
5343
5344         if (!arb_err_val)
5345                 return;
5346
5347         for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5348                 if (arb_err_val & BIT(j)) {
5349                         dev_err_ratelimited(hdev->dev,
5350                                         "%s ARB_ERR. err cause: %s\n",
5351                                         qm_name,
5352                                         gaudi_qman_arb_error_cause[j]);
5353                 }
5354         }
5355 }
5356
5357 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
5358                 struct hl_eq_ecc_data *ecc_data)
5359 {
5360         struct ecc_info_extract_params params;
5361         u64 ecc_address = 0, ecc_syndrom = 0;
5362         u8 index, memory_wrapper_idx = 0;
5363         bool extract_info_from_fw;
5364         int rc;
5365
5366         switch (event_type) {
5367         case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
5368         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
5369                 extract_info_from_fw = true;
5370                 break;
5371         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5372                 index = event_type - GAUDI_EVENT_TPC0_SERR;
5373                 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5374                 params.num_memories = 90;
5375                 params.derr = false;
5376                 params.disable_clock_gating = true;
5377                 extract_info_from_fw = false;
5378                 break;
5379         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5380                 index = event_type - GAUDI_EVENT_TPC0_DERR;
5381                 params.block_address =
5382                         mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5383                 params.num_memories = 90;
5384                 params.derr = true;
5385                 params.disable_clock_gating = true;
5386                 extract_info_from_fw = false;
5387                 break;
5388         case GAUDI_EVENT_MME0_ACC_SERR:
5389         case GAUDI_EVENT_MME1_ACC_SERR:
5390         case GAUDI_EVENT_MME2_ACC_SERR:
5391         case GAUDI_EVENT_MME3_ACC_SERR:
5392                 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5393                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5394                 params.num_memories = 128;
5395                 params.derr = false;
5396                 params.disable_clock_gating = true;
5397                 extract_info_from_fw = false;
5398                 break;
5399         case GAUDI_EVENT_MME0_ACC_DERR:
5400         case GAUDI_EVENT_MME1_ACC_DERR:
5401         case GAUDI_EVENT_MME2_ACC_DERR:
5402         case GAUDI_EVENT_MME3_ACC_DERR:
5403                 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5404                 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5405                 params.num_memories = 128;
5406                 params.derr = true;
5407                 params.disable_clock_gating = true;
5408                 extract_info_from_fw = false;
5409                 break;
5410         case GAUDI_EVENT_MME0_SBAB_SERR:
5411         case GAUDI_EVENT_MME1_SBAB_SERR:
5412         case GAUDI_EVENT_MME2_SBAB_SERR:
5413         case GAUDI_EVENT_MME3_SBAB_SERR:
5414                 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5415                 params.block_address =
5416                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5417                 params.num_memories = 33;
5418                 params.derr = false;
5419                 params.disable_clock_gating = true;
5420                 extract_info_from_fw = false;
5421                 break;
5422         case GAUDI_EVENT_MME0_SBAB_DERR:
5423         case GAUDI_EVENT_MME1_SBAB_DERR:
5424         case GAUDI_EVENT_MME2_SBAB_DERR:
5425         case GAUDI_EVENT_MME3_SBAB_DERR:
5426                 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5427                 params.block_address =
5428                         mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5429                 params.num_memories = 33;
5430                 params.derr = true;
5431                 params.disable_clock_gating = true;
5432         default:
5433                 return;
5434         }
5435
5436         if (extract_info_from_fw) {
5437                 ecc_address = le64_to_cpu(ecc_data->ecc_address);
5438                 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
5439                 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
5440         } else {
5441                 rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
5442                                 &ecc_syndrom, &memory_wrapper_idx);
5443                 if (rc)
5444                         return;
5445         }
5446
5447         dev_err(hdev->dev,
5448                 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
5449                 ecc_address, ecc_syndrom, memory_wrapper_idx);
5450 }
5451
5452 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5453 {
5454         u64 glbl_sts_addr, arb_err_addr;
5455         u8 index;
5456         char desc[32];
5457
5458         switch (event_type) {
5459         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5460                 index = event_type - GAUDI_EVENT_TPC0_QM;
5461                 glbl_sts_addr =
5462                         mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5463                 arb_err_addr =
5464                         mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5465                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5466                 break;
5467         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5468                 index = event_type - GAUDI_EVENT_MME0_QM;
5469                 glbl_sts_addr =
5470                         mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5471                 arb_err_addr =
5472                         mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5473                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5474                 break;
5475         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5476                 index = event_type - GAUDI_EVENT_DMA0_QM;
5477                 glbl_sts_addr =
5478                         mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5479                 arb_err_addr =
5480                         mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5481                 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5482                 break;
5483         default:
5484                 return;
5485         }
5486
5487         gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5488 }
5489
5490 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5491                                         bool razwi)
5492 {
5493         char desc[64] = "";
5494
5495         gaudi_get_event_desc(event_type, desc, sizeof(desc));
5496         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5497                 event_type, desc);
5498
5499         if (razwi) {
5500                 gaudi_print_razwi_info(hdev);
5501                 gaudi_print_mmu_error_info(hdev);
5502         }
5503 }
5504
5505 static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5506 {
5507         struct gaudi_device *gaudi = hdev->asic_specific;
5508
5509         /* Unmask all IRQs since some could have been received
5510          * during the soft reset
5511          */
5512         return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
5513 }
5514
5515 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5516 {
5517         int ch, err = 0;
5518         u32 base, val, val2;
5519
5520         base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5521         for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5522                 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5523                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5524                 if (val) {
5525                         err = 1;
5526                         dev_err(hdev->dev,
5527                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5528                                 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5529                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
5530                                 (val >> 4) & 0x1);
5531
5532                         val2 = RREG32(base + ch * 0x1000 + 0x060);
5533                         dev_err(hdev->dev,
5534                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5535                                 device, ch * 2,
5536                                 RREG32(base + ch * 0x1000 + 0x064),
5537                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5538                                 (val2 & 0xFF0000) >> 16,
5539                                 (val2 & 0xFF000000) >> 24);
5540                 }
5541
5542                 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5543                 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5544                 if (val) {
5545                         err = 1;
5546                         dev_err(hdev->dev,
5547                                 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5548                                 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5549                                 (val >> 2) & 0x1, (val >> 3) & 0x1,
5550                                 (val >> 4) & 0x1);
5551
5552                         val2 = RREG32(base + ch * 0x1000 + 0x070);
5553                         dev_err(hdev->dev,
5554                                 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5555                                 device, ch * 2 + 1,
5556                                 RREG32(base + ch * 0x1000 + 0x074),
5557                                 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5558                                 (val2 & 0xFF0000) >> 16,
5559                                 (val2 & 0xFF000000) >> 24);
5560                 }
5561
5562                 /* Clear interrupts */
5563                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5564                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5565                 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5566                 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5567                 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5568                 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5569         }
5570
5571         val  = RREG32(base + 0x8F30);
5572         val2 = RREG32(base + 0x8F34);
5573         if (val | val2) {
5574                 err = 1;
5575                 dev_err(hdev->dev,
5576                         "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5577                         device, val, val2);
5578         }
5579         val  = RREG32(base + 0x8F40);
5580         val2 = RREG32(base + 0x8F44);
5581         if (val | val2) {
5582                 err = 1;
5583                 dev_err(hdev->dev,
5584                         "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5585                         device, val, val2);
5586         }
5587
5588         return err;
5589 }
5590
5591 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5592 {
5593         switch (hbm_event_type) {
5594         case GAUDI_EVENT_HBM0_SPI_0:
5595         case GAUDI_EVENT_HBM0_SPI_1:
5596                 return 0;
5597         case GAUDI_EVENT_HBM1_SPI_0:
5598         case GAUDI_EVENT_HBM1_SPI_1:
5599                 return 1;
5600         case GAUDI_EVENT_HBM2_SPI_0:
5601         case GAUDI_EVENT_HBM2_SPI_1:
5602                 return 2;
5603         case GAUDI_EVENT_HBM3_SPI_0:
5604         case GAUDI_EVENT_HBM3_SPI_1:
5605                 return 3;
5606         default:
5607                 break;
5608         }
5609
5610         /* Should never happen */
5611         return 0;
5612 }
5613
5614 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5615                                         char *interrupt_name)
5616 {
5617         struct gaudi_device *gaudi = hdev->asic_specific;
5618         u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5619         bool soft_reset_required = false;
5620
5621         /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5622          * gating, and thus cannot be done in CPU-CP and should be done instead
5623          * by the driver.
5624          */
5625
5626         mutex_lock(&gaudi->clk_gate_mutex);
5627
5628         hdev->asic_funcs->disable_clock_gating(hdev);
5629
5630         tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5631                                 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5632
5633         for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5634                 if (tpc_interrupts_cause & BIT(i)) {
5635                         dev_err_ratelimited(hdev->dev,
5636                                         "TPC%d_%s interrupt cause: %s\n",
5637                                         tpc_id, interrupt_name,
5638                                         gaudi_tpc_interrupts_cause[i]);
5639                         /* If this is QM error, we need to soft-reset */
5640                         if (i == 15)
5641                                 soft_reset_required = true;
5642                 }
5643
5644         /* Clear interrupts */
5645         WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5646
5647         hdev->asic_funcs->set_clock_gating(hdev);
5648
5649         mutex_unlock(&gaudi->clk_gate_mutex);
5650
5651         return soft_reset_required;
5652 }
5653
5654 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5655 {
5656         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5657 }
5658
5659 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5660 {
5661         return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5662 }
5663
5664 static void gaudi_print_clk_change_info(struct hl_device *hdev,
5665                                         u16 event_type)
5666 {
5667         switch (event_type) {
5668         case GAUDI_EVENT_FIX_POWER_ENV_S:
5669                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_POWER;
5670                 dev_info_ratelimited(hdev->dev,
5671                         "Clock throttling due to power consumption\n");
5672                 break;
5673
5674         case GAUDI_EVENT_FIX_POWER_ENV_E:
5675                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_POWER;
5676                 dev_info_ratelimited(hdev->dev,
5677                         "Power envelop is safe, back to optimal clock\n");
5678                 break;
5679
5680         case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5681                 hdev->clk_throttling_reason |= HL_CLK_THROTTLE_THERMAL;
5682                 dev_info_ratelimited(hdev->dev,
5683                         "Clock throttling due to overheating\n");
5684                 break;
5685
5686         case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5687                 hdev->clk_throttling_reason &= ~HL_CLK_THROTTLE_THERMAL;
5688                 dev_info_ratelimited(hdev->dev,
5689                         "Thermal envelop is safe, back to optimal clock\n");
5690                 break;
5691
5692         default:
5693                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5694                         event_type);
5695                 break;
5696         }
5697 }
5698
5699 static void gaudi_handle_eqe(struct hl_device *hdev,
5700                                 struct hl_eq_entry *eq_entry)
5701 {
5702         struct gaudi_device *gaudi = hdev->asic_specific;
5703         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5704         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5705                         >> EQ_CTL_EVENT_TYPE_SHIFT);
5706         u8 cause;
5707         bool reset_required;
5708
5709         gaudi->events_stat[event_type]++;
5710         gaudi->events_stat_aggregate[event_type]++;
5711
5712         switch (event_type) {
5713         case GAUDI_EVENT_PCIE_CORE_DERR:
5714         case GAUDI_EVENT_PCIE_IF_DERR:
5715         case GAUDI_EVENT_PCIE_PHY_DERR:
5716         case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5717         case GAUDI_EVENT_MME0_ACC_DERR:
5718         case GAUDI_EVENT_MME0_SBAB_DERR:
5719         case GAUDI_EVENT_MME1_ACC_DERR:
5720         case GAUDI_EVENT_MME1_SBAB_DERR:
5721         case GAUDI_EVENT_MME2_ACC_DERR:
5722         case GAUDI_EVENT_MME2_SBAB_DERR:
5723         case GAUDI_EVENT_MME3_ACC_DERR:
5724         case GAUDI_EVENT_MME3_SBAB_DERR:
5725         case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5726                 fallthrough;
5727         case GAUDI_EVENT_CPU_IF_ECC_DERR:
5728         case GAUDI_EVENT_PSOC_MEM_DERR:
5729         case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5730         case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5731         case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5732         case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5733         case GAUDI_EVENT_MMU_DERR:
5734                 gaudi_print_irq_info(hdev, event_type, true);
5735                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5736                 if (hdev->hard_reset_on_fw_events)
5737                         hl_device_reset(hdev, true, false);
5738                 break;
5739
5740         case GAUDI_EVENT_GIC500:
5741         case GAUDI_EVENT_AXI_ECC:
5742         case GAUDI_EVENT_L2_RAM_ECC:
5743         case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5744                 gaudi_print_irq_info(hdev, event_type, false);
5745                 if (hdev->hard_reset_on_fw_events)
5746                         hl_device_reset(hdev, true, false);
5747                 break;
5748
5749         case GAUDI_EVENT_HBM0_SPI_0:
5750         case GAUDI_EVENT_HBM1_SPI_0:
5751         case GAUDI_EVENT_HBM2_SPI_0:
5752         case GAUDI_EVENT_HBM3_SPI_0:
5753                 gaudi_print_irq_info(hdev, event_type, false);
5754                 gaudi_hbm_read_interrupts(hdev,
5755                                           gaudi_hbm_event_to_dev(event_type));
5756                 if (hdev->hard_reset_on_fw_events)
5757                         hl_device_reset(hdev, true, false);
5758                 break;
5759
5760         case GAUDI_EVENT_HBM0_SPI_1:
5761         case GAUDI_EVENT_HBM1_SPI_1:
5762         case GAUDI_EVENT_HBM2_SPI_1:
5763         case GAUDI_EVENT_HBM3_SPI_1:
5764                 gaudi_print_irq_info(hdev, event_type, false);
5765                 gaudi_hbm_read_interrupts(hdev,
5766                                           gaudi_hbm_event_to_dev(event_type));
5767                 break;
5768
5769         case GAUDI_EVENT_TPC0_DEC:
5770         case GAUDI_EVENT_TPC1_DEC:
5771         case GAUDI_EVENT_TPC2_DEC:
5772         case GAUDI_EVENT_TPC3_DEC:
5773         case GAUDI_EVENT_TPC4_DEC:
5774         case GAUDI_EVENT_TPC5_DEC:
5775         case GAUDI_EVENT_TPC6_DEC:
5776         case GAUDI_EVENT_TPC7_DEC:
5777                 gaudi_print_irq_info(hdev, event_type, true);
5778                 reset_required = gaudi_tpc_read_interrupts(hdev,
5779                                         tpc_dec_event_to_tpc_id(event_type),
5780                                         "AXI_SLV_DEC_Error");
5781                 if (reset_required) {
5782                         dev_err(hdev->dev, "hard reset required due to %s\n",
5783                                 gaudi_irq_map_table[event_type].name);
5784
5785                         if (hdev->hard_reset_on_fw_events)
5786                                 hl_device_reset(hdev, true, false);
5787                 } else {
5788                         hl_fw_unmask_irq(hdev, event_type);
5789                 }
5790                 break;
5791
5792         case GAUDI_EVENT_TPC0_KRN_ERR:
5793         case GAUDI_EVENT_TPC1_KRN_ERR:
5794         case GAUDI_EVENT_TPC2_KRN_ERR:
5795         case GAUDI_EVENT_TPC3_KRN_ERR:
5796         case GAUDI_EVENT_TPC4_KRN_ERR:
5797         case GAUDI_EVENT_TPC5_KRN_ERR:
5798         case GAUDI_EVENT_TPC6_KRN_ERR:
5799         case GAUDI_EVENT_TPC7_KRN_ERR:
5800                 gaudi_print_irq_info(hdev, event_type, true);
5801                 reset_required = gaudi_tpc_read_interrupts(hdev,
5802                                         tpc_krn_event_to_tpc_id(event_type),
5803                                         "KRN_ERR");
5804                 if (reset_required) {
5805                         dev_err(hdev->dev, "hard reset required due to %s\n",
5806                                 gaudi_irq_map_table[event_type].name);
5807
5808                         if (hdev->hard_reset_on_fw_events)
5809                                 hl_device_reset(hdev, true, false);
5810                 } else {
5811                         hl_fw_unmask_irq(hdev, event_type);
5812                 }
5813                 break;
5814
5815         case GAUDI_EVENT_PCIE_CORE_SERR:
5816         case GAUDI_EVENT_PCIE_IF_SERR:
5817         case GAUDI_EVENT_PCIE_PHY_SERR:
5818         case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5819         case GAUDI_EVENT_MME0_ACC_SERR:
5820         case GAUDI_EVENT_MME0_SBAB_SERR:
5821         case GAUDI_EVENT_MME1_ACC_SERR:
5822         case GAUDI_EVENT_MME1_SBAB_SERR:
5823         case GAUDI_EVENT_MME2_ACC_SERR:
5824         case GAUDI_EVENT_MME2_SBAB_SERR:
5825         case GAUDI_EVENT_MME3_ACC_SERR:
5826         case GAUDI_EVENT_MME3_SBAB_SERR:
5827         case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5828         case GAUDI_EVENT_CPU_IF_ECC_SERR:
5829         case GAUDI_EVENT_PSOC_MEM_SERR:
5830         case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5831         case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5832         case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5833         case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5834                 fallthrough;
5835         case GAUDI_EVENT_MMU_SERR:
5836                 gaudi_print_irq_info(hdev, event_type, true);
5837                 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
5838                 hl_fw_unmask_irq(hdev, event_type);
5839                 break;
5840
5841         case GAUDI_EVENT_PCIE_DEC:
5842         case GAUDI_EVENT_MME0_WBC_RSP:
5843         case GAUDI_EVENT_MME0_SBAB0_RSP:
5844         case GAUDI_EVENT_MME1_WBC_RSP:
5845         case GAUDI_EVENT_MME1_SBAB0_RSP:
5846         case GAUDI_EVENT_MME2_WBC_RSP:
5847         case GAUDI_EVENT_MME2_SBAB0_RSP:
5848         case GAUDI_EVENT_MME3_WBC_RSP:
5849         case GAUDI_EVENT_MME3_SBAB0_RSP:
5850         case GAUDI_EVENT_CPU_AXI_SPLITTER:
5851         case GAUDI_EVENT_PSOC_AXI_DEC:
5852         case GAUDI_EVENT_PSOC_PRSTN_FALL:
5853         case GAUDI_EVENT_MMU_PAGE_FAULT:
5854         case GAUDI_EVENT_MMU_WR_PERM:
5855         case GAUDI_EVENT_RAZWI_OR_ADC:
5856         case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5857         case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5858         case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5859                 fallthrough;
5860         case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5861                 gaudi_print_irq_info(hdev, event_type, true);
5862                 gaudi_handle_qman_err(hdev, event_type);
5863                 hl_fw_unmask_irq(hdev, event_type);
5864                 break;
5865
5866         case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5867                 gaudi_print_irq_info(hdev, event_type, true);
5868                 if (hdev->hard_reset_on_fw_events)
5869                         hl_device_reset(hdev, true, false);
5870                 break;
5871
5872         case GAUDI_EVENT_TPC0_BMON_SPMU:
5873         case GAUDI_EVENT_TPC1_BMON_SPMU:
5874         case GAUDI_EVENT_TPC2_BMON_SPMU:
5875         case GAUDI_EVENT_TPC3_BMON_SPMU:
5876         case GAUDI_EVENT_TPC4_BMON_SPMU:
5877         case GAUDI_EVENT_TPC5_BMON_SPMU:
5878         case GAUDI_EVENT_TPC6_BMON_SPMU:
5879         case GAUDI_EVENT_TPC7_BMON_SPMU:
5880         case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5881                 gaudi_print_irq_info(hdev, event_type, false);
5882                 hl_fw_unmask_irq(hdev, event_type);
5883                 break;
5884
5885         case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5886                 gaudi_print_clk_change_info(hdev, event_type);
5887                 hl_fw_unmask_irq(hdev, event_type);
5888                 break;
5889
5890         case GAUDI_EVENT_PSOC_GPIO_U16_0:
5891                 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5892                 dev_err(hdev->dev,
5893                         "Received high temp H/W interrupt %d (cause %d)\n",
5894                         event_type, cause);
5895                 break;
5896
5897         default:
5898                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5899                                 event_type);
5900                 break;
5901         }
5902 }
5903
5904 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5905                                         u32 *size)
5906 {
5907         struct gaudi_device *gaudi = hdev->asic_specific;
5908
5909         if (aggregate) {
5910                 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5911                 return gaudi->events_stat_aggregate;
5912         }
5913
5914         *size = (u32) sizeof(gaudi->events_stat);
5915         return gaudi->events_stat;
5916 }
5917
5918 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5919                                         u32 flags)
5920 {
5921         struct gaudi_device *gaudi = hdev->asic_specific;
5922         u32 status, timeout_usec;
5923         int rc;
5924
5925         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5926                 hdev->hard_reset_pending)
5927                 return 0;
5928
5929         if (hdev->pldm)
5930                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5931         else
5932                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5933
5934         mutex_lock(&hdev->mmu_cache_lock);
5935
5936         /* L0 & L1 invalidation */
5937         WREG32(mmSTLB_INV_PS, 3);
5938         WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
5939         WREG32(mmSTLB_INV_PS, 2);
5940
5941         rc = hl_poll_timeout(
5942                 hdev,
5943                 mmSTLB_INV_PS,
5944                 status,
5945                 !status,
5946                 1000,
5947                 timeout_usec);
5948
5949         WREG32(mmSTLB_INV_SET, 0);
5950
5951         mutex_unlock(&hdev->mmu_cache_lock);
5952
5953         if (rc) {
5954                 dev_err_ratelimited(hdev->dev,
5955                                         "MMU cache invalidation timeout\n");
5956                 hl_device_reset(hdev, true, false);
5957         }
5958
5959         return rc;
5960 }
5961
5962 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
5963                                 bool is_hard, u32 asid, u64 va, u64 size)
5964 {
5965         struct gaudi_device *gaudi = hdev->asic_specific;
5966         u32 status, timeout_usec;
5967         u32 inv_data;
5968         u32 pi;
5969         int rc;
5970
5971         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5972                 hdev->hard_reset_pending)
5973                 return 0;
5974
5975         mutex_lock(&hdev->mmu_cache_lock);
5976
5977         if (hdev->pldm)
5978                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5979         else
5980                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5981
5982         /*
5983          * TODO: currently invalidate entire L0 & L1 as in regular hard
5984          * invalidation. Need to apply invalidation of specific cache
5985          * lines with mask of ASID & VA & size.
5986          * Note that L1 with be flushed entirely in any case.
5987          */
5988
5989         /* L0 & L1 invalidation */
5990         inv_data = RREG32(mmSTLB_CACHE_INV);
5991         /* PI is 8 bit */
5992         pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5993         WREG32(mmSTLB_CACHE_INV,
5994                 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5995
5996         rc = hl_poll_timeout(
5997                 hdev,
5998                 mmSTLB_INV_CONSUMER_INDEX,
5999                 status,
6000                 status == pi,
6001                 1000,
6002                 timeout_usec);
6003
6004         mutex_unlock(&hdev->mmu_cache_lock);
6005
6006         if (rc) {
6007                 dev_err_ratelimited(hdev->dev,
6008                                         "MMU cache invalidation timeout\n");
6009                 hl_device_reset(hdev, true, false);
6010         }
6011
6012         return rc;
6013 }
6014
6015 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6016                                         u32 asid, u64 phys_addr)
6017 {
6018         u32 status, timeout_usec;
6019         int rc;
6020
6021         if (hdev->pldm)
6022                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6023         else
6024                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6025
6026         WREG32(MMU_ASID, asid);
6027         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6028         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6029         WREG32(MMU_BUSY, 0x80000000);
6030
6031         rc = hl_poll_timeout(
6032                 hdev,
6033                 MMU_BUSY,
6034                 status,
6035                 !(status & 0x80000000),
6036                 1000,
6037                 timeout_usec);
6038
6039         if (rc) {
6040                 dev_err(hdev->dev,
6041                         "Timeout during MMU hop0 config of asid %d\n", asid);
6042                 return rc;
6043         }
6044
6045         return 0;
6046 }
6047
6048 static int gaudi_send_heartbeat(struct hl_device *hdev)
6049 {
6050         struct gaudi_device *gaudi = hdev->asic_specific;
6051
6052         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6053                 return 0;
6054
6055         return hl_fw_send_heartbeat(hdev);
6056 }
6057
6058 static int gaudi_cpucp_info_get(struct hl_device *hdev)
6059 {
6060         struct gaudi_device *gaudi = hdev->asic_specific;
6061         struct asic_fixed_properties *prop = &hdev->asic_prop;
6062         int rc;
6063
6064         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6065                 return 0;
6066
6067         rc = hl_fw_cpucp_info_get(hdev);
6068         if (rc)
6069                 return rc;
6070
6071         if (!strlen(prop->cpucp_info.card_name))
6072                 strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6073                                 CARD_NAME_MAX_LEN);
6074
6075         hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
6076
6077         if (hdev->card_type == cpucp_card_type_pci)
6078                 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
6079         else if (hdev->card_type == cpucp_card_type_pmc)
6080                 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
6081
6082         hdev->max_power = prop->max_power_default;
6083
6084         return 0;
6085 }
6086
6087 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
6088                                         struct seq_file *s)
6089 {
6090         struct gaudi_device *gaudi = hdev->asic_specific;
6091         const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6092         const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6093         u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6094         bool is_idle = true, is_eng_idle, is_slave;
6095         u64 offset;
6096         int i, dma_id;
6097
6098         mutex_lock(&gaudi->clk_gate_mutex);
6099
6100         hdev->asic_funcs->disable_clock_gating(hdev);
6101
6102         if (s)
6103                 seq_puts(s,
6104                         "\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
6105                         "---  -------  ------------  ----------  -------------\n");
6106
6107         for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6108                 dma_id = gaudi_dma_assignment[i];
6109                 offset = dma_id * DMA_QMAN_OFFSET;
6110
6111                 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6112                 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6113                 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6114                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6115                                 IS_DMA_IDLE(dma_core_sts0);
6116                 is_idle &= is_eng_idle;
6117
6118                 if (mask)
6119                         *mask |= ((u64) !is_eng_idle) <<
6120                                         (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6121                 if (s)
6122                         seq_printf(s, fmt, dma_id,
6123                                 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6124                                 qm_cgm_sts, dma_core_sts0);
6125         }
6126
6127         if (s)
6128                 seq_puts(s,
6129                         "\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
6130                         "---  -------  ------------  ----------  ----------\n");
6131
6132         for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6133                 offset = i * TPC_QMAN_OFFSET;
6134                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6135                 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6136                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6137                 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6138                                 IS_TPC_IDLE(tpc_cfg_sts);
6139                 is_idle &= is_eng_idle;
6140
6141                 if (mask)
6142                         *mask |= ((u64) !is_eng_idle) <<
6143                                                 (GAUDI_ENGINE_ID_TPC_0 + i);
6144                 if (s)
6145                         seq_printf(s, fmt, i,
6146                                 is_eng_idle ? "Y" : "N",
6147                                 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6148         }
6149
6150         if (s)
6151                 seq_puts(s,
6152                         "\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
6153                         "---  -------  ------------  ----------  -----------\n");
6154
6155         for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6156                 offset = i * MME_QMAN_OFFSET;
6157                 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6158                 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6159
6160                 /* MME 1 & 3 are slaves, no need to check their QMANs */
6161                 is_slave = i % 2;
6162                 if (!is_slave) {
6163                         qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6164                         qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6165                         is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6166                 }
6167
6168                 is_idle &= is_eng_idle;
6169
6170                 if (mask)
6171                         *mask |= ((u64) !is_eng_idle) <<
6172                                                 (GAUDI_ENGINE_ID_MME_0 + i);
6173                 if (s) {
6174                         if (!is_slave)
6175                                 seq_printf(s, fmt, i,
6176                                         is_eng_idle ? "Y" : "N",
6177                                         qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6178                         else
6179                                 seq_printf(s, mme_slave_fmt, i,
6180                                         is_eng_idle ? "Y" : "N", "-",
6181                                         "-", mme_arch_sts);
6182                 }
6183         }
6184
6185         if (s)
6186                 seq_puts(s, "\n");
6187
6188         hdev->asic_funcs->set_clock_gating(hdev);
6189
6190         mutex_unlock(&gaudi->clk_gate_mutex);
6191
6192         return is_idle;
6193 }
6194
6195 static void gaudi_hw_queues_lock(struct hl_device *hdev)
6196         __acquires(&gaudi->hw_queues_lock)
6197 {
6198         struct gaudi_device *gaudi = hdev->asic_specific;
6199
6200         spin_lock(&gaudi->hw_queues_lock);
6201 }
6202
6203 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6204         __releases(&gaudi->hw_queues_lock)
6205 {
6206         struct gaudi_device *gaudi = hdev->asic_specific;
6207
6208         spin_unlock(&gaudi->hw_queues_lock);
6209 }
6210
6211 static u32 gaudi_get_pci_id(struct hl_device *hdev)
6212 {
6213         return hdev->pdev->device;
6214 }
6215
6216 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6217                                 size_t max_size)
6218 {
6219         struct gaudi_device *gaudi = hdev->asic_specific;
6220
6221         if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6222                 return 0;
6223
6224         return hl_fw_get_eeprom_data(hdev, data, max_size);
6225 }
6226
6227 /*
6228  * this function should be used only during initialization and/or after reset,
6229  * when there are no active users.
6230  */
6231 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6232                                 u32 tpc_id)
6233 {
6234         struct gaudi_device *gaudi = hdev->asic_specific;
6235         u64 kernel_timeout;
6236         u32 status, offset;
6237         int rc;
6238
6239         offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6240
6241         if (hdev->pldm)
6242                 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6243         else
6244                 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6245
6246         mutex_lock(&gaudi->clk_gate_mutex);
6247
6248         hdev->asic_funcs->disable_clock_gating(hdev);
6249
6250         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6251                         lower_32_bits(tpc_kernel));
6252         WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6253                         upper_32_bits(tpc_kernel));
6254
6255         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6256                         lower_32_bits(tpc_kernel));
6257         WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6258                         upper_32_bits(tpc_kernel));
6259         /* set a valid LUT pointer, content is of no significance */
6260         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6261                         lower_32_bits(tpc_kernel));
6262         WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6263                         upper_32_bits(tpc_kernel));
6264
6265         WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6266                         lower_32_bits(CFG_BASE +
6267                                 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6268
6269         WREG32(mmTPC0_CFG_TPC_CMD + offset,
6270                         (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6271                         1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6272         /* wait a bit for the engine to start executing */
6273         usleep_range(1000, 1500);
6274
6275         /* wait until engine has finished executing */
6276         rc = hl_poll_timeout(
6277                 hdev,
6278                 mmTPC0_CFG_STATUS + offset,
6279                 status,
6280                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6281                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6282                 1000,
6283                 kernel_timeout);
6284
6285         if (rc) {
6286                 dev_err(hdev->dev,
6287                         "Timeout while waiting for TPC%d icache prefetch\n",
6288                         tpc_id);
6289                 hdev->asic_funcs->set_clock_gating(hdev);
6290                 mutex_unlock(&gaudi->clk_gate_mutex);
6291                 return -EIO;
6292         }
6293
6294         WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6295                         1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6296
6297         /* wait a bit for the engine to start executing */
6298         usleep_range(1000, 1500);
6299
6300         /* wait until engine has finished executing */
6301         rc = hl_poll_timeout(
6302                 hdev,
6303                 mmTPC0_CFG_STATUS + offset,
6304                 status,
6305                 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6306                                 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6307                 1000,
6308                 kernel_timeout);
6309
6310         if (rc) {
6311                 dev_err(hdev->dev,
6312                         "Timeout while waiting for TPC%d vector pipe\n",
6313                         tpc_id);
6314                 hdev->asic_funcs->set_clock_gating(hdev);
6315                 mutex_unlock(&gaudi->clk_gate_mutex);
6316                 return -EIO;
6317         }
6318
6319         rc = hl_poll_timeout(
6320                 hdev,
6321                 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6322                 status,
6323                 (status == 0),
6324                 1000,
6325                 kernel_timeout);
6326
6327         hdev->asic_funcs->set_clock_gating(hdev);
6328         mutex_unlock(&gaudi->clk_gate_mutex);
6329
6330         if (rc) {
6331                 dev_err(hdev->dev,
6332                         "Timeout while waiting for TPC%d kernel to execute\n",
6333                         tpc_id);
6334                 return -EIO;
6335         }
6336
6337         return 0;
6338 }
6339
6340 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6341 {
6342         return RREG32(mmHW_STATE);
6343 }
6344
6345 static int gaudi_ctx_init(struct hl_ctx *ctx)
6346 {
6347         return 0;
6348 }
6349
6350 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6351 {
6352         return gaudi_cq_assignment[cq_idx];
6353 }
6354
6355 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6356 {
6357         return sizeof(struct packet_msg_short) +
6358                         sizeof(struct packet_msg_prot) * 2;
6359 }
6360
6361 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6362 {
6363         return sizeof(struct packet_msg_short) * 4 +
6364                         sizeof(struct packet_fence) +
6365                         sizeof(struct packet_msg_prot) * 2;
6366 }
6367
6368 static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6369 {
6370         struct hl_cb *cb = (struct hl_cb *) data;
6371         struct packet_msg_short *pkt;
6372         u32 value, ctl;
6373
6374         pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6375         memset(pkt, 0, sizeof(*pkt));
6376
6377         /* Inc by 1, Mode ADD */
6378         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
6379         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
6380
6381         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
6382         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6383         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
6384         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6385         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 1);
6386         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6387         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6388
6389         pkt->value = cpu_to_le32(value);
6390         pkt->ctl = cpu_to_le32(ctl);
6391 }
6392
6393 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6394                                         u16 addr)
6395 {
6396         u32 ctl, pkt_size = sizeof(*pkt);
6397
6398         memset(pkt, 0, pkt_size);
6399
6400         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6401         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
6402         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6403         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6404         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6405         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
6406
6407         pkt->value = cpu_to_le32(value);
6408         pkt->ctl = cpu_to_le32(ctl);
6409
6410         return pkt_size;
6411 }
6412
6413 static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6414                                         u16 sob_val, u16 addr)
6415 {
6416         u32 ctl, value, pkt_size = sizeof(*pkt);
6417         u8 mask = ~(1 << (sob_id & 0x7));
6418
6419         memset(pkt, 0, pkt_size);
6420
6421         value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_id / 8);
6422         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
6423         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
6424                         0); /* GREATER OR EQUAL*/
6425         value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
6426
6427         ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
6428         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
6429         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
6430         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
6431         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6432         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6433         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6434
6435         pkt->value = cpu_to_le32(value);
6436         pkt->ctl = cpu_to_le32(ctl);
6437
6438         return pkt_size;
6439 }
6440
6441 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6442 {
6443         u32 ctl, cfg, pkt_size = sizeof(*pkt);
6444
6445         memset(pkt, 0, pkt_size);
6446
6447         cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
6448         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
6449         cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
6450
6451         ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
6452         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
6453         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
6454         ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
6455
6456         pkt->cfg = cpu_to_le32(cfg);
6457         pkt->ctl = cpu_to_le32(ctl);
6458
6459         return pkt_size;
6460 }
6461
6462 static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6463                         u16 sob_val, u16 mon_id, u32 q_idx)
6464 {
6465         struct hl_cb *cb = (struct hl_cb *) data;
6466         void *buf = (void *) (uintptr_t) cb->kernel_address;
6467         u64 monitor_base, fence_addr = 0;
6468         u32 size = 0;
6469         u16 msg_addr_offset;
6470
6471         switch (q_idx) {
6472         case GAUDI_QUEUE_ID_DMA_0_0:
6473                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6474                 break;
6475         case GAUDI_QUEUE_ID_DMA_0_1:
6476                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6477                 break;
6478         case GAUDI_QUEUE_ID_DMA_0_2:
6479                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6480                 break;
6481         case GAUDI_QUEUE_ID_DMA_0_3:
6482                 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6483                 break;
6484         case GAUDI_QUEUE_ID_DMA_1_0:
6485                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6486                 break;
6487         case GAUDI_QUEUE_ID_DMA_1_1:
6488                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6489                 break;
6490         case GAUDI_QUEUE_ID_DMA_1_2:
6491                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6492                 break;
6493         case GAUDI_QUEUE_ID_DMA_1_3:
6494                 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6495                 break;
6496         case GAUDI_QUEUE_ID_DMA_5_0:
6497                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6498                 break;
6499         case GAUDI_QUEUE_ID_DMA_5_1:
6500                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6501                 break;
6502         case GAUDI_QUEUE_ID_DMA_5_2:
6503                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6504                 break;
6505         case GAUDI_QUEUE_ID_DMA_5_3:
6506                 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6507                 break;
6508         default:
6509                 /* queue index should be valid here */
6510                 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6511                                 q_idx);
6512                 return;
6513         }
6514
6515         fence_addr += CFG_BASE;
6516
6517         /*
6518          * monitor_base should be the content of the base0 address registers,
6519          * so it will be added to the msg short offsets
6520          */
6521         monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6522
6523         /* First monitor config packet: low address of the sync */
6524         msg_addr_offset =
6525                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6526                                 monitor_base;
6527
6528         size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6529                                         msg_addr_offset);
6530
6531         /* Second monitor config packet: high address of the sync */
6532         msg_addr_offset =
6533                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6534                                 monitor_base;
6535
6536         size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6537                                         msg_addr_offset);
6538
6539         /*
6540          * Third monitor config packet: the payload, i.e. what to write when the
6541          * sync triggers
6542          */
6543         msg_addr_offset =
6544                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6545                                 monitor_base;
6546
6547         size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6548
6549         /* Fourth monitor config packet: bind the monitor to a sync object */
6550         msg_addr_offset =
6551                 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6552                                 monitor_base;
6553         size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6554                                                 msg_addr_offset);
6555
6556         /* Fence packet */
6557         size += gaudi_add_fence_pkt(buf + size);
6558 }
6559
6560 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6561 {
6562         struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6563
6564         dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6565                 hw_sob->sob_id);
6566
6567         WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6568                 0);
6569
6570         kref_init(&hw_sob->kref);
6571 }
6572
6573 static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6574 {
6575         if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6576                                                         HL_POWER9_HOST_MAGIC) {
6577                 hdev->power9_64bit_dma_enable = 1;
6578                 hdev->dma_mask = 64;
6579         } else {
6580                 hdev->power9_64bit_dma_enable = 0;
6581                 hdev->dma_mask = 48;
6582         }
6583 }
6584
6585 static u64 gaudi_get_device_time(struct hl_device *hdev)
6586 {
6587         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6588
6589         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6590 }
6591
6592 static const struct hl_asic_funcs gaudi_funcs = {
6593         .early_init = gaudi_early_init,
6594         .early_fini = gaudi_early_fini,
6595         .late_init = gaudi_late_init,
6596         .late_fini = gaudi_late_fini,
6597         .sw_init = gaudi_sw_init,
6598         .sw_fini = gaudi_sw_fini,
6599         .hw_init = gaudi_hw_init,
6600         .hw_fini = gaudi_hw_fini,
6601         .halt_engines = gaudi_halt_engines,
6602         .suspend = gaudi_suspend,
6603         .resume = gaudi_resume,
6604         .cb_mmap = gaudi_cb_mmap,
6605         .ring_doorbell = gaudi_ring_doorbell,
6606         .pqe_write = gaudi_pqe_write,
6607         .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6608         .asic_dma_free_coherent = gaudi_dma_free_coherent,
6609         .get_int_queue_base = gaudi_get_int_queue_base,
6610         .test_queues = gaudi_test_queues,
6611         .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6612         .asic_dma_pool_free = gaudi_dma_pool_free,
6613         .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6614         .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6615         .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6616         .cs_parser = gaudi_cs_parser,
6617         .asic_dma_map_sg = gaudi_dma_map_sg,
6618         .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6619         .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6620         .update_eq_ci = gaudi_update_eq_ci,
6621         .context_switch = gaudi_context_switch,
6622         .restore_phase_topology = gaudi_restore_phase_topology,
6623         .debugfs_read32 = gaudi_debugfs_read32,
6624         .debugfs_write32 = gaudi_debugfs_write32,
6625         .debugfs_read64 = gaudi_debugfs_read64,
6626         .debugfs_write64 = gaudi_debugfs_write64,
6627         .add_device_attr = gaudi_add_device_attr,
6628         .handle_eqe = gaudi_handle_eqe,
6629         .set_pll_profile = gaudi_set_pll_profile,
6630         .get_events_stat = gaudi_get_events_stat,
6631         .read_pte = gaudi_read_pte,
6632         .write_pte = gaudi_write_pte,
6633         .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6634         .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6635         .send_heartbeat = gaudi_send_heartbeat,
6636         .set_clock_gating = gaudi_set_clock_gating,
6637         .disable_clock_gating = gaudi_disable_clock_gating,
6638         .debug_coresight = gaudi_debug_coresight,
6639         .is_device_idle = gaudi_is_device_idle,
6640         .soft_reset_late_init = gaudi_soft_reset_late_init,
6641         .hw_queues_lock = gaudi_hw_queues_lock,
6642         .hw_queues_unlock = gaudi_hw_queues_unlock,
6643         .get_pci_id = gaudi_get_pci_id,
6644         .get_eeprom_data = gaudi_get_eeprom_data,
6645         .send_cpu_message = gaudi_send_cpu_message,
6646         .get_hw_state = gaudi_get_hw_state,
6647         .pci_bars_map = gaudi_pci_bars_map,
6648         .init_iatu = gaudi_init_iatu,
6649         .rreg = hl_rreg,
6650         .wreg = hl_wreg,
6651         .halt_coresight = gaudi_halt_coresight,
6652         .ctx_init = gaudi_ctx_init,
6653         .get_clk_rate = gaudi_get_clk_rate,
6654         .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6655         .read_device_fw_version = gaudi_read_device_fw_version,
6656         .load_firmware_to_device = gaudi_load_firmware_to_device,
6657         .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6658         .get_signal_cb_size = gaudi_get_signal_cb_size,
6659         .get_wait_cb_size = gaudi_get_wait_cb_size,
6660         .gen_signal_cb = gaudi_gen_signal_cb,
6661         .gen_wait_cb = gaudi_gen_wait_cb,
6662         .reset_sob = gaudi_reset_sob,
6663         .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6664         .get_device_time = gaudi_get_device_time
6665 };
6666
6667 /**
6668  * gaudi_set_asic_funcs - set GAUDI function pointers
6669  *
6670  * @hdev: pointer to hl_device structure
6671  *
6672  */
6673 void gaudi_set_asic_funcs(struct hl_device *hdev)
6674 {
6675         hdev->asic_funcs = &gaudi_funcs;
6676 }