habanalabs: remove power9 workaround for dma support
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / goya / goya.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "goyaP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_0.h"
11 #include "../include/goya/asic_reg/goya_masks.h"
12 #include "../include/goya/goya_reg_map.h"
13
14 #include <linux/pci.h>
15 #include <linux/hwmon.h>
16 #include <linux/iommu.h>
17 #include <linux/seq_file.h>
18
19 /*
20  * GOYA security scheme:
21  *
22  * 1. Host is protected by:
23  *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
24  *        - MMU
25  *
26  * 2. DRAM is protected by:
27  *        - Range registers (protect the first 512MB)
28  *        - MMU (isolation between users)
29  *
30  * 3. Configuration is protected by:
31  *        - Range registers
32  *        - Protection bits
33  *
34  * When MMU is disabled:
35  *
36  * QMAN DMA: PQ, CQ, CP, DMA are secured.
37  * PQ, CB and the data are on the host.
38  *
39  * QMAN TPC/MME:
40  * PQ, CQ and CP are not secured.
41  * PQ, CB and the data are on the SRAM/DRAM.
42  *
43  * Since QMAN DMA is secured, the driver is parsing the DMA CB:
44  *     - checks DMA pointer
45  *     - WREG, MSG_PROT are not allowed.
46  *     - MSG_LONG/SHORT are allowed.
47  *
48  * A read/write transaction by the QMAN to a protected area will succeed if
49  * and only if the QMAN's CP is secured and MSG_PROT is used
50  *
51  *
52  * When MMU is enabled:
53  *
54  * QMAN DMA: PQ, CQ and CP are secured.
55  * MMU is set to bypass on the Secure props register of the QMAN.
56  * The reasons we don't enable MMU for PQ, CQ and CP are:
57  *     - PQ entry is in kernel address space and the driver doesn't map it.
58  *     - CP writes to MSIX register and to kernel address space (completion
59  *       queue).
60  *
61  * DMA is not secured but because CP is secured, the driver still needs to parse
62  * the CB, but doesn't need to check the DMA addresses.
63  *
64  * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
65  * the driver doesn't map memory in MMU.
66  *
67  * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
68  *
69  * DMA RR does NOT protect host because DMA is not secured
70  *
71  */
72
73 #define GOYA_BOOT_FIT_FILE      "habanalabs/goya/goya-boot-fit.itb"
74 #define GOYA_LINUX_FW_FILE      "habanalabs/goya/goya-fit.itb"
75
76 #define GOYA_MMU_REGS_NUM               63
77
78 #define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
79
80 #define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
81 #define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
82 #define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
83 #define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
84 #define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
85 #define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
86 #define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
87 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
88 #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC  1000000         /* 1s */
89 #define GOYA_MSG_TO_CPU_TIMEOUT_USEC    4000000         /* 4s */
90 #define GOYA_WAIT_FOR_BL_TIMEOUT_USEC   15000000        /* 15s */
91
92 #define GOYA_QMAN0_FENCE_VAL            0xD169B243
93
94 #define GOYA_MAX_STRING_LEN             20
95
96 #define GOYA_CB_POOL_CB_CNT             512
97 #define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
98
99 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
100         (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
101 #define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
102 #define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
103 #define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
104
105 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
106         (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
107                         engine##_CMDQ_IDLE_MASK)
108 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
109         IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
110 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
111         IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
112
113 #define IS_DMA_IDLE(dma_core_sts0) \
114         !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
115
116 #define IS_TPC_IDLE(tpc_cfg_sts) \
117         (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
118
119 #define IS_MME_IDLE(mme_arch_sts) \
120         (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
121
122 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
123                 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
124                 "goya cq 4", "goya cpu eq"
125 };
126
127 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
128         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
129         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
130         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
131         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
132         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
133         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
134         [PACKET_FENCE]          = sizeof(struct packet_fence),
135         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
136         [PACKET_NOP]            = sizeof(struct packet_nop),
137         [PACKET_STOP]           = sizeof(struct packet_stop)
138 };
139
140 static inline bool validate_packet_id(enum packet_id id)
141 {
142         switch (id) {
143         case PACKET_WREG_32:
144         case PACKET_WREG_BULK:
145         case PACKET_MSG_LONG:
146         case PACKET_MSG_SHORT:
147         case PACKET_CP_DMA:
148         case PACKET_MSG_PROT:
149         case PACKET_FENCE:
150         case PACKET_LIN_DMA:
151         case PACKET_NOP:
152         case PACKET_STOP:
153                 return true;
154         default:
155                 return false;
156         }
157 }
158
159 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
160         mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
161         mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
162         mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
163         mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
164         mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
165         mmTPC0_QM_GLBL_SECURE_PROPS,
166         mmTPC0_QM_GLBL_NON_SECURE_PROPS,
167         mmTPC0_CMDQ_GLBL_SECURE_PROPS,
168         mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
169         mmTPC0_CFG_ARUSER,
170         mmTPC0_CFG_AWUSER,
171         mmTPC1_QM_GLBL_SECURE_PROPS,
172         mmTPC1_QM_GLBL_NON_SECURE_PROPS,
173         mmTPC1_CMDQ_GLBL_SECURE_PROPS,
174         mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
175         mmTPC1_CFG_ARUSER,
176         mmTPC1_CFG_AWUSER,
177         mmTPC2_QM_GLBL_SECURE_PROPS,
178         mmTPC2_QM_GLBL_NON_SECURE_PROPS,
179         mmTPC2_CMDQ_GLBL_SECURE_PROPS,
180         mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
181         mmTPC2_CFG_ARUSER,
182         mmTPC2_CFG_AWUSER,
183         mmTPC3_QM_GLBL_SECURE_PROPS,
184         mmTPC3_QM_GLBL_NON_SECURE_PROPS,
185         mmTPC3_CMDQ_GLBL_SECURE_PROPS,
186         mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
187         mmTPC3_CFG_ARUSER,
188         mmTPC3_CFG_AWUSER,
189         mmTPC4_QM_GLBL_SECURE_PROPS,
190         mmTPC4_QM_GLBL_NON_SECURE_PROPS,
191         mmTPC4_CMDQ_GLBL_SECURE_PROPS,
192         mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
193         mmTPC4_CFG_ARUSER,
194         mmTPC4_CFG_AWUSER,
195         mmTPC5_QM_GLBL_SECURE_PROPS,
196         mmTPC5_QM_GLBL_NON_SECURE_PROPS,
197         mmTPC5_CMDQ_GLBL_SECURE_PROPS,
198         mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
199         mmTPC5_CFG_ARUSER,
200         mmTPC5_CFG_AWUSER,
201         mmTPC6_QM_GLBL_SECURE_PROPS,
202         mmTPC6_QM_GLBL_NON_SECURE_PROPS,
203         mmTPC6_CMDQ_GLBL_SECURE_PROPS,
204         mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
205         mmTPC6_CFG_ARUSER,
206         mmTPC6_CFG_AWUSER,
207         mmTPC7_QM_GLBL_SECURE_PROPS,
208         mmTPC7_QM_GLBL_NON_SECURE_PROPS,
209         mmTPC7_CMDQ_GLBL_SECURE_PROPS,
210         mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
211         mmTPC7_CFG_ARUSER,
212         mmTPC7_CFG_AWUSER,
213         mmMME_QM_GLBL_SECURE_PROPS,
214         mmMME_QM_GLBL_NON_SECURE_PROPS,
215         mmMME_CMDQ_GLBL_SECURE_PROPS,
216         mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
217         mmMME_SBA_CONTROL_DATA,
218         mmMME_SBB_CONTROL_DATA,
219         mmMME_SBC_CONTROL_DATA,
220         mmMME_WBC_CONTROL_DATA,
221         mmPCIE_WRAP_PSOC_ARUSER,
222         mmPCIE_WRAP_PSOC_AWUSER
223 };
224
225 static u32 goya_all_events[] = {
226         GOYA_ASYNC_EVENT_ID_PCIE_IF,
227         GOYA_ASYNC_EVENT_ID_TPC0_ECC,
228         GOYA_ASYNC_EVENT_ID_TPC1_ECC,
229         GOYA_ASYNC_EVENT_ID_TPC2_ECC,
230         GOYA_ASYNC_EVENT_ID_TPC3_ECC,
231         GOYA_ASYNC_EVENT_ID_TPC4_ECC,
232         GOYA_ASYNC_EVENT_ID_TPC5_ECC,
233         GOYA_ASYNC_EVENT_ID_TPC6_ECC,
234         GOYA_ASYNC_EVENT_ID_TPC7_ECC,
235         GOYA_ASYNC_EVENT_ID_MME_ECC,
236         GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
237         GOYA_ASYNC_EVENT_ID_MMU_ECC,
238         GOYA_ASYNC_EVENT_ID_DMA_MACRO,
239         GOYA_ASYNC_EVENT_ID_DMA_ECC,
240         GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
241         GOYA_ASYNC_EVENT_ID_PSOC_MEM,
242         GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
243         GOYA_ASYNC_EVENT_ID_SRAM0,
244         GOYA_ASYNC_EVENT_ID_SRAM1,
245         GOYA_ASYNC_EVENT_ID_SRAM2,
246         GOYA_ASYNC_EVENT_ID_SRAM3,
247         GOYA_ASYNC_EVENT_ID_SRAM4,
248         GOYA_ASYNC_EVENT_ID_SRAM5,
249         GOYA_ASYNC_EVENT_ID_SRAM6,
250         GOYA_ASYNC_EVENT_ID_SRAM7,
251         GOYA_ASYNC_EVENT_ID_SRAM8,
252         GOYA_ASYNC_EVENT_ID_SRAM9,
253         GOYA_ASYNC_EVENT_ID_SRAM10,
254         GOYA_ASYNC_EVENT_ID_SRAM11,
255         GOYA_ASYNC_EVENT_ID_SRAM12,
256         GOYA_ASYNC_EVENT_ID_SRAM13,
257         GOYA_ASYNC_EVENT_ID_SRAM14,
258         GOYA_ASYNC_EVENT_ID_SRAM15,
259         GOYA_ASYNC_EVENT_ID_SRAM16,
260         GOYA_ASYNC_EVENT_ID_SRAM17,
261         GOYA_ASYNC_EVENT_ID_SRAM18,
262         GOYA_ASYNC_EVENT_ID_SRAM19,
263         GOYA_ASYNC_EVENT_ID_SRAM20,
264         GOYA_ASYNC_EVENT_ID_SRAM21,
265         GOYA_ASYNC_EVENT_ID_SRAM22,
266         GOYA_ASYNC_EVENT_ID_SRAM23,
267         GOYA_ASYNC_EVENT_ID_SRAM24,
268         GOYA_ASYNC_EVENT_ID_SRAM25,
269         GOYA_ASYNC_EVENT_ID_SRAM26,
270         GOYA_ASYNC_EVENT_ID_SRAM27,
271         GOYA_ASYNC_EVENT_ID_SRAM28,
272         GOYA_ASYNC_EVENT_ID_SRAM29,
273         GOYA_ASYNC_EVENT_ID_GIC500,
274         GOYA_ASYNC_EVENT_ID_PLL0,
275         GOYA_ASYNC_EVENT_ID_PLL1,
276         GOYA_ASYNC_EVENT_ID_PLL3,
277         GOYA_ASYNC_EVENT_ID_PLL4,
278         GOYA_ASYNC_EVENT_ID_PLL5,
279         GOYA_ASYNC_EVENT_ID_PLL6,
280         GOYA_ASYNC_EVENT_ID_AXI_ECC,
281         GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
282         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
283         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
284         GOYA_ASYNC_EVENT_ID_PCIE_DEC,
285         GOYA_ASYNC_EVENT_ID_TPC0_DEC,
286         GOYA_ASYNC_EVENT_ID_TPC1_DEC,
287         GOYA_ASYNC_EVENT_ID_TPC2_DEC,
288         GOYA_ASYNC_EVENT_ID_TPC3_DEC,
289         GOYA_ASYNC_EVENT_ID_TPC4_DEC,
290         GOYA_ASYNC_EVENT_ID_TPC5_DEC,
291         GOYA_ASYNC_EVENT_ID_TPC6_DEC,
292         GOYA_ASYNC_EVENT_ID_TPC7_DEC,
293         GOYA_ASYNC_EVENT_ID_MME_WACS,
294         GOYA_ASYNC_EVENT_ID_MME_WACSD,
295         GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
296         GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
297         GOYA_ASYNC_EVENT_ID_PSOC,
298         GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
299         GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
300         GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
301         GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
302         GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
303         GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
304         GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
305         GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
306         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
307         GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
308         GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
309         GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
310         GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
311         GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
312         GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
313         GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
314         GOYA_ASYNC_EVENT_ID_TPC0_QM,
315         GOYA_ASYNC_EVENT_ID_TPC1_QM,
316         GOYA_ASYNC_EVENT_ID_TPC2_QM,
317         GOYA_ASYNC_EVENT_ID_TPC3_QM,
318         GOYA_ASYNC_EVENT_ID_TPC4_QM,
319         GOYA_ASYNC_EVENT_ID_TPC5_QM,
320         GOYA_ASYNC_EVENT_ID_TPC6_QM,
321         GOYA_ASYNC_EVENT_ID_TPC7_QM,
322         GOYA_ASYNC_EVENT_ID_MME_QM,
323         GOYA_ASYNC_EVENT_ID_MME_CMDQ,
324         GOYA_ASYNC_EVENT_ID_DMA0_QM,
325         GOYA_ASYNC_EVENT_ID_DMA1_QM,
326         GOYA_ASYNC_EVENT_ID_DMA2_QM,
327         GOYA_ASYNC_EVENT_ID_DMA3_QM,
328         GOYA_ASYNC_EVENT_ID_DMA4_QM,
329         GOYA_ASYNC_EVENT_ID_DMA0_CH,
330         GOYA_ASYNC_EVENT_ID_DMA1_CH,
331         GOYA_ASYNC_EVENT_ID_DMA2_CH,
332         GOYA_ASYNC_EVENT_ID_DMA3_CH,
333         GOYA_ASYNC_EVENT_ID_DMA4_CH,
334         GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
335         GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
336         GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
337         GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
338         GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
339         GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
340         GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
341         GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
342         GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
343         GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
344         GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
345         GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
346         GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
347         GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
348         GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
349         GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
350         GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
351 };
352
353 static s64 goya_state_dump_specs_props[SP_MAX] = {0};
354
355 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
356 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
357 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
358 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
359
360 int goya_set_fixed_properties(struct hl_device *hdev)
361 {
362         struct asic_fixed_properties *prop = &hdev->asic_prop;
363         int i;
364
365         prop->max_queues = GOYA_QUEUE_ID_SIZE;
366         prop->hw_queues_props = kcalloc(prop->max_queues,
367                         sizeof(struct hw_queue_properties),
368                         GFP_KERNEL);
369
370         if (!prop->hw_queues_props)
371                 return -ENOMEM;
372
373         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
374                 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
375                 prop->hw_queues_props[i].driver_only = 0;
376                 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
377         }
378
379         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
380                 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
381                 prop->hw_queues_props[i].driver_only = 1;
382                 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_KERNEL;
383         }
384
385         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
386                         NUMBER_OF_INT_HW_QUEUES; i++) {
387                 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
388                 prop->hw_queues_props[i].driver_only = 0;
389                 prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
390         }
391
392         prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
393         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
394
395         prop->dram_base_address = DRAM_PHYS_BASE;
396         prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
397         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
398         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
399
400         prop->sram_base_address = SRAM_BASE_ADDR;
401         prop->sram_size = SRAM_SIZE;
402         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
403         prop->sram_user_base_address = prop->sram_base_address +
404                                                 SRAM_USER_BASE_OFFSET;
405
406         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
407         prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
408         if (hdev->pldm)
409                 prop->mmu_pgt_size = 0x800000; /* 8MB */
410         else
411                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
412         prop->mmu_pte_size = HL_PTE_SIZE;
413         prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
414         prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
415         prop->dram_page_size = PAGE_SIZE_2MB;
416         prop->dram_supports_virtual_memory = true;
417
418         prop->dmmu.hop0_shift = MMU_V1_0_HOP0_SHIFT;
419         prop->dmmu.hop1_shift = MMU_V1_0_HOP1_SHIFT;
420         prop->dmmu.hop2_shift = MMU_V1_0_HOP2_SHIFT;
421         prop->dmmu.hop3_shift = MMU_V1_0_HOP3_SHIFT;
422         prop->dmmu.hop4_shift = MMU_V1_0_HOP4_SHIFT;
423         prop->dmmu.hop0_mask = MMU_V1_0_HOP0_MASK;
424         prop->dmmu.hop1_mask = MMU_V1_0_HOP1_MASK;
425         prop->dmmu.hop2_mask = MMU_V1_0_HOP2_MASK;
426         prop->dmmu.hop3_mask = MMU_V1_0_HOP3_MASK;
427         prop->dmmu.hop4_mask = MMU_V1_0_HOP4_MASK;
428         prop->dmmu.start_addr = VA_DDR_SPACE_START;
429         prop->dmmu.end_addr = VA_DDR_SPACE_END;
430         prop->dmmu.page_size = PAGE_SIZE_2MB;
431         prop->dmmu.num_hops = MMU_ARCH_5_HOPS;
432         prop->dmmu.last_mask = LAST_MASK;
433
434         /* shifts and masks are the same in PMMU and DMMU */
435         memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
436         prop->pmmu.start_addr = VA_HOST_SPACE_START;
437         prop->pmmu.end_addr = VA_HOST_SPACE_END;
438         prop->pmmu.page_size = PAGE_SIZE_4KB;
439         prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
440         prop->pmmu.last_mask = LAST_MASK;
441
442         /* PMMU and HPMMU are the same except of page size */
443         memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
444         prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
445
446         prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
447         prop->cfg_size = CFG_SIZE;
448         prop->max_asid = MAX_ASID;
449         prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
450         prop->high_pll = PLL_HIGH_DEFAULT;
451         prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
452         prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
453         prop->max_power_default = MAX_POWER_DEFAULT;
454         prop->dc_power_default = DC_POWER_DEFAULT;
455         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
456         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
457         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
458
459         strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
460                 CARD_NAME_MAX_LEN);
461
462         prop->max_pending_cs = GOYA_MAX_PENDING_CS;
463
464         prop->first_available_user_msix_interrupt = USHRT_MAX;
465
466         for (i = 0 ; i < HL_MAX_DCORES ; i++)
467                 prop->first_available_cq[i] = USHRT_MAX;
468
469         prop->fw_cpu_boot_dev_sts0_valid = false;
470         prop->fw_cpu_boot_dev_sts1_valid = false;
471         prop->hard_reset_done_by_fw = false;
472         prop->gic_interrupts_enable = true;
473
474         prop->server_type = HL_SERVER_TYPE_UNKNOWN;
475
476         prop->clk_pll_index = HL_GOYA_MME_PLL;
477
478         prop->use_get_power_for_reset_history = true;
479
480         return 0;
481 }
482
483 /*
484  * goya_pci_bars_map - Map PCI BARS of Goya device
485  *
486  * @hdev: pointer to hl_device structure
487  *
488  * Request PCI regions and map them to kernel virtual addresses.
489  * Returns 0 on success
490  *
491  */
492 static int goya_pci_bars_map(struct hl_device *hdev)
493 {
494         static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
495         bool is_wc[3] = {false, false, true};
496         int rc;
497
498         rc = hl_pci_bars_map(hdev, name, is_wc);
499         if (rc)
500                 return rc;
501
502         hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
503                         (CFG_BASE - SRAM_BASE_ADDR);
504
505         return 0;
506 }
507
508 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
509 {
510         struct goya_device *goya = hdev->asic_specific;
511         struct hl_inbound_pci_region pci_region;
512         u64 old_addr = addr;
513         int rc;
514
515         if ((goya) && (goya->ddr_bar_cur_addr == addr))
516                 return old_addr;
517
518         /* Inbound Region 1 - Bar 4 - Point to DDR */
519         pci_region.mode = PCI_BAR_MATCH_MODE;
520         pci_region.bar = DDR_BAR_ID;
521         pci_region.addr = addr;
522         rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
523         if (rc)
524                 return U64_MAX;
525
526         if (goya) {
527                 old_addr = goya->ddr_bar_cur_addr;
528                 goya->ddr_bar_cur_addr = addr;
529         }
530
531         return old_addr;
532 }
533
534 /*
535  * goya_init_iatu - Initialize the iATU unit inside the PCI controller
536  *
537  * @hdev: pointer to hl_device structure
538  *
539  * This is needed in case the firmware doesn't initialize the iATU
540  *
541  */
542 static int goya_init_iatu(struct hl_device *hdev)
543 {
544         struct hl_inbound_pci_region inbound_region;
545         struct hl_outbound_pci_region outbound_region;
546         int rc;
547
548         if (hdev->asic_prop.iatu_done_by_fw)
549                 return 0;
550
551         /* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
552         inbound_region.mode = PCI_BAR_MATCH_MODE;
553         inbound_region.bar = SRAM_CFG_BAR_ID;
554         inbound_region.addr = SRAM_BASE_ADDR;
555         rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
556         if (rc)
557                 goto done;
558
559         /* Inbound Region 1 - Bar 4 - Point to DDR */
560         inbound_region.mode = PCI_BAR_MATCH_MODE;
561         inbound_region.bar = DDR_BAR_ID;
562         inbound_region.addr = DRAM_PHYS_BASE;
563         rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
564         if (rc)
565                 goto done;
566
567         hdev->asic_funcs->set_dma_mask_from_fw(hdev);
568
569         /* Outbound Region 0 - Point to Host  */
570         outbound_region.addr = HOST_PHYS_BASE;
571         outbound_region.size = HOST_PHYS_SIZE;
572         rc = hl_pci_set_outbound_region(hdev, &outbound_region);
573
574 done:
575         return rc;
576 }
577
578 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
579 {
580         return RREG32(mmHW_STATE);
581 }
582
583 /*
584  * goya_early_init - GOYA early initialization code
585  *
586  * @hdev: pointer to hl_device structure
587  *
588  * Verify PCI bars
589  * Set DMA masks
590  * PCI controller initialization
591  * Map PCI bars
592  *
593  */
594 static int goya_early_init(struct hl_device *hdev)
595 {
596         struct asic_fixed_properties *prop = &hdev->asic_prop;
597         struct pci_dev *pdev = hdev->pdev;
598         u32 fw_boot_status, val;
599         int rc;
600
601         rc = goya_set_fixed_properties(hdev);
602         if (rc) {
603                 dev_err(hdev->dev, "Failed to get fixed properties\n");
604                 return rc;
605         }
606
607         /* Check BAR sizes */
608         if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
609                 dev_err(hdev->dev,
610                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
611                         SRAM_CFG_BAR_ID,
612                         (unsigned long long) pci_resource_len(pdev,
613                                                         SRAM_CFG_BAR_ID),
614                         CFG_BAR_SIZE);
615                 rc = -ENODEV;
616                 goto free_queue_props;
617         }
618
619         if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
620                 dev_err(hdev->dev,
621                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
622                         MSIX_BAR_ID,
623                         (unsigned long long) pci_resource_len(pdev,
624                                                                 MSIX_BAR_ID),
625                         MSIX_BAR_SIZE);
626                 rc = -ENODEV;
627                 goto free_queue_props;
628         }
629
630         prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
631         hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
632
633         /* If FW security is enabled at this point it means no access to ELBI */
634         if (hdev->asic_prop.fw_security_enabled) {
635                 hdev->asic_prop.iatu_done_by_fw = true;
636                 goto pci_init;
637         }
638
639         rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
640                                 &fw_boot_status);
641         if (rc)
642                 goto free_queue_props;
643
644         /* Check whether FW is configuring iATU */
645         if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
646                         (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
647                 hdev->asic_prop.iatu_done_by_fw = true;
648
649 pci_init:
650         rc = hl_pci_init(hdev);
651         if (rc)
652                 goto free_queue_props;
653
654         /* Before continuing in the initialization, we need to read the preboot
655          * version to determine whether we run with a security-enabled firmware
656          */
657         rc = hl_fw_read_preboot_status(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
658                                         mmCPU_BOOT_DEV_STS0,
659                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
660                                         mmCPU_BOOT_ERR1,
661                                         GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
662         if (rc) {
663                 if (hdev->reset_on_preboot_fail)
664                         hdev->asic_funcs->hw_fini(hdev, true, false);
665                 goto pci_fini;
666         }
667
668         if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
669                 dev_info(hdev->dev,
670                         "H/W state is dirty, must reset before initializing\n");
671                 hdev->asic_funcs->hw_fini(hdev, true, false);
672         }
673
674         if (!hdev->pldm) {
675                 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
676                 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
677                         dev_warn(hdev->dev,
678                                 "PCI strap is not configured correctly, PCI bus errors may occur\n");
679         }
680
681         return 0;
682
683 pci_fini:
684         hl_pci_fini(hdev);
685 free_queue_props:
686         kfree(hdev->asic_prop.hw_queues_props);
687         return rc;
688 }
689
690 /*
691  * goya_early_fini - GOYA early finalization code
692  *
693  * @hdev: pointer to hl_device structure
694  *
695  * Unmap PCI bars
696  *
697  */
698 static int goya_early_fini(struct hl_device *hdev)
699 {
700         kfree(hdev->asic_prop.hw_queues_props);
701         hl_pci_fini(hdev);
702
703         return 0;
704 }
705
706 static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
707 {
708         /* mask to zero the MMBP and ASID bits */
709         WREG32_AND(reg, ~0x7FF);
710         WREG32_OR(reg, asid);
711 }
712
713 static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
714 {
715         struct goya_device *goya = hdev->asic_specific;
716
717         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
718                 return;
719
720         if (secure)
721                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
722         else
723                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
724
725         RREG32(mmDMA_QM_0_GLBL_PROT);
726 }
727
728 /*
729  * goya_fetch_psoc_frequency - Fetch PSOC frequency values
730  *
731  * @hdev: pointer to hl_device structure
732  *
733  */
734 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
735 {
736         struct asic_fixed_properties *prop = &hdev->asic_prop;
737         u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
738         u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
739         int rc;
740
741         if (hdev->asic_prop.fw_security_enabled) {
742                 struct goya_device *goya = hdev->asic_specific;
743
744                 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
745                         return;
746
747                 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
748                                 pll_freq_arr);
749
750                 if (rc)
751                         return;
752
753                 freq = pll_freq_arr[1];
754         } else {
755                 div_fctr = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
756                 div_sel = RREG32(mmPSOC_PCI_PLL_DIV_SEL_1);
757                 nr = RREG32(mmPSOC_PCI_PLL_NR);
758                 nf = RREG32(mmPSOC_PCI_PLL_NF);
759                 od = RREG32(mmPSOC_PCI_PLL_OD);
760
761                 if (div_sel == DIV_SEL_REF_CLK ||
762                                 div_sel == DIV_SEL_DIVIDED_REF) {
763                         if (div_sel == DIV_SEL_REF_CLK)
764                                 freq = PLL_REF_CLK;
765                         else
766                                 freq = PLL_REF_CLK / (div_fctr + 1);
767                 } else if (div_sel == DIV_SEL_PLL_CLK ||
768                                 div_sel == DIV_SEL_DIVIDED_PLL) {
769                         pll_clk = PLL_REF_CLK * (nf + 1) /
770                                         ((nr + 1) * (od + 1));
771                         if (div_sel == DIV_SEL_PLL_CLK)
772                                 freq = pll_clk;
773                         else
774                                 freq = pll_clk / (div_fctr + 1);
775                 } else {
776                         dev_warn(hdev->dev,
777                                 "Received invalid div select value: %d",
778                                 div_sel);
779                         freq = 0;
780                 }
781         }
782
783         prop->psoc_timestamp_frequency = freq;
784         prop->psoc_pci_pll_nr = nr;
785         prop->psoc_pci_pll_nf = nf;
786         prop->psoc_pci_pll_od = od;
787         prop->psoc_pci_pll_div_factor = div_fctr;
788 }
789
790 /*
791  * goya_set_frequency - set the frequency of the device
792  *
793  * @hdev: pointer to habanalabs device structure
794  * @freq: the new frequency value
795  *
796  * Change the frequency if needed. This function has no protection against
797  * concurrency, therefore it is assumed that the calling function has protected
798  * itself against the case of calling this function from multiple threads with
799  * different values
800  *
801  * Returns 0 if no change was done, otherwise returns 1
802  */
803 int goya_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
804 {
805         struct goya_device *goya = hdev->asic_specific;
806
807         if ((goya->pm_mng_profile == PM_MANUAL) ||
808                         (goya->curr_pll_profile == freq))
809                 return 0;
810
811         dev_dbg(hdev->dev, "Changing device frequency to %s\n",
812                 freq == PLL_HIGH ? "high" : "low");
813
814         goya_set_pll_profile(hdev, freq);
815
816         goya->curr_pll_profile = freq;
817
818         return 1;
819 }
820
821 static void goya_set_freq_to_low_job(struct work_struct *work)
822 {
823         struct goya_work_freq *goya_work = container_of(work,
824                                                 struct goya_work_freq,
825                                                 work_freq.work);
826         struct hl_device *hdev = goya_work->hdev;
827
828         mutex_lock(&hdev->fpriv_list_lock);
829
830         if (!hdev->is_compute_ctx_active)
831                 goya_set_frequency(hdev, PLL_LOW);
832
833         mutex_unlock(&hdev->fpriv_list_lock);
834
835         schedule_delayed_work(&goya_work->work_freq,
836                         usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
837 }
838
839 int goya_late_init(struct hl_device *hdev)
840 {
841         struct asic_fixed_properties *prop = &hdev->asic_prop;
842         struct goya_device *goya = hdev->asic_specific;
843         int rc;
844
845         goya_fetch_psoc_frequency(hdev);
846
847         rc = goya_mmu_clear_pgt_range(hdev);
848         if (rc) {
849                 dev_err(hdev->dev,
850                         "Failed to clear MMU page tables range %d\n", rc);
851                 return rc;
852         }
853
854         rc = goya_mmu_set_dram_default_page(hdev);
855         if (rc) {
856                 dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
857                 return rc;
858         }
859
860         rc = goya_mmu_add_mappings_for_device_cpu(hdev);
861         if (rc)
862                 return rc;
863
864         rc = goya_init_cpu_queues(hdev);
865         if (rc)
866                 return rc;
867
868         rc = goya_test_cpu_queue(hdev);
869         if (rc)
870                 return rc;
871
872         rc = goya_cpucp_info_get(hdev);
873         if (rc) {
874                 dev_err(hdev->dev, "Failed to get cpucp info %d\n", rc);
875                 return rc;
876         }
877
878         /* Now that we have the DRAM size in ASIC prop, we need to check
879          * its size and configure the DMA_IF DDR wrap protection (which is in
880          * the MMU block) accordingly. The value is the log2 of the DRAM size
881          */
882         WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
883
884         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS);
885         if (rc) {
886                 dev_err(hdev->dev,
887                         "Failed to enable PCI access from CPU %d\n", rc);
888                 return rc;
889         }
890
891         /* force setting to low frequency */
892         goya->curr_pll_profile = PLL_LOW;
893
894         goya->pm_mng_profile = PM_AUTO;
895
896         goya_set_pll_profile(hdev, PLL_LOW);
897
898         schedule_delayed_work(&goya->goya_work->work_freq,
899                 usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
900
901         return 0;
902 }
903
904 /*
905  * goya_late_fini - GOYA late tear-down code
906  *
907  * @hdev: pointer to hl_device structure
908  *
909  * Free sensors allocated structures
910  */
911 void goya_late_fini(struct hl_device *hdev)
912 {
913         const struct hwmon_channel_info **channel_info_arr;
914         struct goya_device *goya = hdev->asic_specific;
915         int i = 0;
916
917         cancel_delayed_work_sync(&goya->goya_work->work_freq);
918
919         if (!hdev->hl_chip_info->info)
920                 return;
921
922         channel_info_arr = hdev->hl_chip_info->info;
923
924         while (channel_info_arr[i]) {
925                 kfree(channel_info_arr[i]->config);
926                 kfree(channel_info_arr[i]);
927                 i++;
928         }
929
930         kfree(channel_info_arr);
931
932         hdev->hl_chip_info->info = NULL;
933 }
934
935 static void goya_set_pci_memory_regions(struct hl_device *hdev)
936 {
937         struct asic_fixed_properties *prop = &hdev->asic_prop;
938         struct pci_mem_region *region;
939
940         /* CFG */
941         region = &hdev->pci_mem_region[PCI_REGION_CFG];
942         region->region_base = CFG_BASE;
943         region->region_size = CFG_SIZE;
944         region->offset_in_bar = CFG_BASE - SRAM_BASE_ADDR;
945         region->bar_size = CFG_BAR_SIZE;
946         region->bar_id = SRAM_CFG_BAR_ID;
947         region->used = 1;
948
949         /* SRAM */
950         region = &hdev->pci_mem_region[PCI_REGION_SRAM];
951         region->region_base = SRAM_BASE_ADDR;
952         region->region_size = SRAM_SIZE;
953         region->offset_in_bar = 0;
954         region->bar_size = CFG_BAR_SIZE;
955         region->bar_id = SRAM_CFG_BAR_ID;
956         region->used = 1;
957
958         /* DRAM */
959         region = &hdev->pci_mem_region[PCI_REGION_DRAM];
960         region->region_base = DRAM_PHYS_BASE;
961         region->region_size = hdev->asic_prop.dram_size;
962         region->offset_in_bar = 0;
963         region->bar_size = prop->dram_pci_bar_size;
964         region->bar_id = DDR_BAR_ID;
965         region->used = 1;
966 }
967
968 /*
969  * goya_sw_init - Goya software initialization code
970  *
971  * @hdev: pointer to hl_device structure
972  *
973  */
974 static int goya_sw_init(struct hl_device *hdev)
975 {
976         struct goya_device *goya;
977         int rc;
978
979         /* Allocate device structure */
980         goya = kzalloc(sizeof(*goya), GFP_KERNEL);
981         if (!goya)
982                 return -ENOMEM;
983
984         /* according to goya_init_iatu */
985         goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
986
987         goya->mme_clk = GOYA_PLL_FREQ_LOW;
988         goya->tpc_clk = GOYA_PLL_FREQ_LOW;
989         goya->ic_clk = GOYA_PLL_FREQ_LOW;
990
991         hdev->asic_specific = goya;
992
993         /* Create DMA pool for small allocations */
994         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
995                         &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
996         if (!hdev->dma_pool) {
997                 dev_err(hdev->dev, "failed to create DMA pool\n");
998                 rc = -ENOMEM;
999                 goto free_goya_device;
1000         }
1001
1002         hdev->cpu_accessible_dma_mem =
1003                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1004                                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1005                                         &hdev->cpu_accessible_dma_address,
1006                                         GFP_KERNEL | __GFP_ZERO);
1007
1008         if (!hdev->cpu_accessible_dma_mem) {
1009                 rc = -ENOMEM;
1010                 goto free_dma_pool;
1011         }
1012
1013         dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
1014                 &hdev->cpu_accessible_dma_address);
1015
1016         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1017         if (!hdev->cpu_accessible_dma_pool) {
1018                 dev_err(hdev->dev,
1019                         "Failed to create CPU accessible DMA pool\n");
1020                 rc = -ENOMEM;
1021                 goto free_cpu_dma_mem;
1022         }
1023
1024         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1025                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
1026                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1027         if (rc) {
1028                 dev_err(hdev->dev,
1029                         "Failed to add memory to CPU accessible DMA pool\n");
1030                 rc = -EFAULT;
1031                 goto free_cpu_accessible_dma_pool;
1032         }
1033
1034         spin_lock_init(&goya->hw_queues_lock);
1035         hdev->supports_coresight = true;
1036         hdev->asic_prop.supports_soft_reset = true;
1037         hdev->asic_prop.allow_inference_soft_reset = true;
1038         hdev->supports_wait_for_multi_cs = false;
1039
1040         hdev->asic_funcs->set_pci_memory_regions(hdev);
1041
1042         goya->goya_work = kmalloc(sizeof(struct goya_work_freq), GFP_KERNEL);
1043         if (!goya->goya_work) {
1044                 rc = -ENOMEM;
1045                 goto free_cpu_accessible_dma_pool;
1046         }
1047
1048         goya->goya_work->hdev = hdev;
1049         INIT_DELAYED_WORK(&goya->goya_work->work_freq, goya_set_freq_to_low_job);
1050
1051         return 0;
1052
1053 free_cpu_accessible_dma_pool:
1054         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1055 free_cpu_dma_mem:
1056         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1057                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1058                         hdev->cpu_accessible_dma_mem,
1059                         hdev->cpu_accessible_dma_address);
1060 free_dma_pool:
1061         dma_pool_destroy(hdev->dma_pool);
1062 free_goya_device:
1063         kfree(goya);
1064
1065         return rc;
1066 }
1067
1068 /*
1069  * goya_sw_fini - Goya software tear-down code
1070  *
1071  * @hdev: pointer to hl_device structure
1072  *
1073  */
1074 static int goya_sw_fini(struct hl_device *hdev)
1075 {
1076         struct goya_device *goya = hdev->asic_specific;
1077
1078         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1079
1080         hdev->asic_funcs->asic_dma_free_coherent(hdev,
1081                         HL_CPU_ACCESSIBLE_MEM_SIZE,
1082                         hdev->cpu_accessible_dma_mem,
1083                         hdev->cpu_accessible_dma_address);
1084
1085         dma_pool_destroy(hdev->dma_pool);
1086
1087         kfree(goya->goya_work);
1088         kfree(goya);
1089
1090         return 0;
1091 }
1092
1093 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
1094                 dma_addr_t bus_address)
1095 {
1096         struct goya_device *goya = hdev->asic_specific;
1097         u32 mtr_base_lo, mtr_base_hi;
1098         u32 so_base_lo, so_base_hi;
1099         u32 gic_base_lo, gic_base_hi;
1100         u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
1101         u32 dma_err_cfg = QMAN_DMA_ERR_MSG_EN;
1102
1103         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1104         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1105         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1106         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1107
1108         gic_base_lo =
1109                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1110         gic_base_hi =
1111                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1112
1113         WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
1114         WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
1115
1116         WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
1117         WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
1118         WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
1119
1120         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1121         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1122         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1123         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1124         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1125         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1126         WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
1127                         GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
1128
1129         /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
1130         WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1131         WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1132
1133         if (goya->hw_cap_initialized & HW_CAP_MMU)
1134                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
1135         else
1136                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
1137
1138         if (hdev->stop_on_err)
1139                 dma_err_cfg |= 1 << DMA_QM_0_GLBL_ERR_CFG_DMA_STOP_ON_ERR_SHIFT;
1140
1141         WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, dma_err_cfg);
1142         WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1143 }
1144
1145 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1146 {
1147         u32 gic_base_lo, gic_base_hi;
1148         u64 sob_addr;
1149         u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1150
1151         gic_base_lo =
1152                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1153         gic_base_hi =
1154                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1155
1156         WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1157         WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1158         WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1159                         GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1160
1161         if (dma_id)
1162                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1163                                 (dma_id - 1) * 4;
1164         else
1165                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
1166
1167         WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
1168         WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1169 }
1170
1171 /*
1172  * goya_init_dma_qmans - Initialize QMAN DMA registers
1173  *
1174  * @hdev: pointer to hl_device structure
1175  *
1176  * Initialize the H/W registers of the QMAN DMA channels
1177  *
1178  */
1179 void goya_init_dma_qmans(struct hl_device *hdev)
1180 {
1181         struct goya_device *goya = hdev->asic_specific;
1182         struct hl_hw_queue *q;
1183         int i;
1184
1185         if (goya->hw_cap_initialized & HW_CAP_DMA)
1186                 return;
1187
1188         q = &hdev->kernel_queues[0];
1189
1190         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1191                 q->cq_id = q->msi_vec = i;
1192                 goya_init_dma_qman(hdev, i, q->bus_address);
1193                 goya_init_dma_ch(hdev, i);
1194         }
1195
1196         goya->hw_cap_initialized |= HW_CAP_DMA;
1197 }
1198
1199 /*
1200  * goya_disable_external_queues - Disable external queues
1201  *
1202  * @hdev: pointer to hl_device structure
1203  *
1204  */
1205 static void goya_disable_external_queues(struct hl_device *hdev)
1206 {
1207         struct goya_device *goya = hdev->asic_specific;
1208
1209         if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1210                 return;
1211
1212         WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1213         WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1214         WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1215         WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1216         WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1217 }
1218
1219 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1220                                 u32 cp_sts_reg, u32 glbl_sts0_reg)
1221 {
1222         int rc;
1223         u32 status;
1224
1225         /* use the values of TPC0 as they are all the same*/
1226
1227         WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1228
1229         status = RREG32(cp_sts_reg);
1230         if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1231                 rc = hl_poll_timeout(
1232                         hdev,
1233                         cp_sts_reg,
1234                         status,
1235                         !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1236                         1000,
1237                         QMAN_FENCE_TIMEOUT_USEC);
1238
1239                 /* if QMAN is stuck in fence no need to check for stop */
1240                 if (rc)
1241                         return 0;
1242         }
1243
1244         rc = hl_poll_timeout(
1245                 hdev,
1246                 glbl_sts0_reg,
1247                 status,
1248                 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1249                 1000,
1250                 QMAN_STOP_TIMEOUT_USEC);
1251
1252         if (rc) {
1253                 dev_err(hdev->dev,
1254                         "Timeout while waiting for QMAN to stop\n");
1255                 return -EINVAL;
1256         }
1257
1258         return 0;
1259 }
1260
1261 /*
1262  * goya_stop_external_queues - Stop external queues
1263  *
1264  * @hdev: pointer to hl_device structure
1265  *
1266  * Returns 0 on success
1267  *
1268  */
1269 static int goya_stop_external_queues(struct hl_device *hdev)
1270 {
1271         int rc, retval = 0;
1272
1273         struct goya_device *goya = hdev->asic_specific;
1274
1275         if (!(goya->hw_cap_initialized & HW_CAP_DMA))
1276                 return retval;
1277
1278         rc = goya_stop_queue(hdev,
1279                         mmDMA_QM_0_GLBL_CFG1,
1280                         mmDMA_QM_0_CP_STS,
1281                         mmDMA_QM_0_GLBL_STS0);
1282
1283         if (rc) {
1284                 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1285                 retval = -EIO;
1286         }
1287
1288         rc = goya_stop_queue(hdev,
1289                         mmDMA_QM_1_GLBL_CFG1,
1290                         mmDMA_QM_1_CP_STS,
1291                         mmDMA_QM_1_GLBL_STS0);
1292
1293         if (rc) {
1294                 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1295                 retval = -EIO;
1296         }
1297
1298         rc = goya_stop_queue(hdev,
1299                         mmDMA_QM_2_GLBL_CFG1,
1300                         mmDMA_QM_2_CP_STS,
1301                         mmDMA_QM_2_GLBL_STS0);
1302
1303         if (rc) {
1304                 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1305                 retval = -EIO;
1306         }
1307
1308         rc = goya_stop_queue(hdev,
1309                         mmDMA_QM_3_GLBL_CFG1,
1310                         mmDMA_QM_3_CP_STS,
1311                         mmDMA_QM_3_GLBL_STS0);
1312
1313         if (rc) {
1314                 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1315                 retval = -EIO;
1316         }
1317
1318         rc = goya_stop_queue(hdev,
1319                         mmDMA_QM_4_GLBL_CFG1,
1320                         mmDMA_QM_4_CP_STS,
1321                         mmDMA_QM_4_GLBL_STS0);
1322
1323         if (rc) {
1324                 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1325                 retval = -EIO;
1326         }
1327
1328         return retval;
1329 }
1330
1331 /*
1332  * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1333  *
1334  * @hdev: pointer to hl_device structure
1335  *
1336  * Returns 0 on success
1337  *
1338  */
1339 int goya_init_cpu_queues(struct hl_device *hdev)
1340 {
1341         struct goya_device *goya = hdev->asic_specific;
1342         struct asic_fixed_properties *prop = &hdev->asic_prop;
1343         struct hl_eq *eq;
1344         u32 status;
1345         struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1346         int err;
1347
1348         if (!hdev->cpu_queues_enable)
1349                 return 0;
1350
1351         if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1352                 return 0;
1353
1354         eq = &hdev->event_queue;
1355
1356         WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
1357         WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
1358
1359         WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
1360         WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
1361
1362         WREG32(mmCPU_CQ_BASE_ADDR_LOW,
1363                         lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1364         WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
1365                         upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1366
1367         WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
1368         WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
1369         WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
1370
1371         /* Used for EQ CI */
1372         WREG32(mmCPU_EQ_CI, 0);
1373
1374         WREG32(mmCPU_IF_PF_PQ_PI, 0);
1375
1376         WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
1377
1378         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1379                         GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1380
1381         err = hl_poll_timeout(
1382                 hdev,
1383                 mmCPU_PQ_INIT_STATUS,
1384                 status,
1385                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1386                 1000,
1387                 GOYA_CPU_TIMEOUT_USEC);
1388
1389         if (err) {
1390                 dev_err(hdev->dev,
1391                         "Failed to setup communication with device CPU\n");
1392                 return -EIO;
1393         }
1394
1395         /* update FW application security bits */
1396         if (prop->fw_cpu_boot_dev_sts0_valid)
1397                 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
1398
1399         if (prop->fw_cpu_boot_dev_sts1_valid)
1400                 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
1401
1402         goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1403         return 0;
1404 }
1405
1406 static void goya_set_pll_refclk(struct hl_device *hdev)
1407 {
1408         WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1409         WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1410         WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1411         WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1412
1413         WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1414         WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1415         WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1416         WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1417
1418         WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1419         WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1420         WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1421         WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1422
1423         WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1424         WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1425         WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1426         WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1427
1428         WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1429         WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1430         WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1431         WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1432
1433         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1434         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1435         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1436         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1437
1438         WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1439         WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1440         WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1441         WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1442 }
1443
1444 static void goya_disable_clk_rlx(struct hl_device *hdev)
1445 {
1446         WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1447         WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1448 }
1449
1450 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1451 {
1452         u64 tpc_eml_address;
1453         u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1454         int err, slm_index;
1455
1456         tpc_offset = tpc_id * 0x40000;
1457         tpc_eml_offset = tpc_id * 0x200000;
1458         tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1459         tpc_slm_offset = tpc_eml_address + 0x100000;
1460
1461         /*
1462          * Workaround for Bug H2 #2443 :
1463          * "TPC SB is not initialized on chip reset"
1464          */
1465
1466         val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1467         if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1468                 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1469                         tpc_id);
1470
1471         WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1472
1473         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1474         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1475         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1476         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1477         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1478         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1479         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1480         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1481         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1482         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1483
1484         WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1485                 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1486
1487         err = hl_poll_timeout(
1488                 hdev,
1489                 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1490                 val,
1491                 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1492                 1000,
1493                 HL_DEVICE_TIMEOUT_USEC);
1494
1495         if (err)
1496                 dev_err(hdev->dev,
1497                         "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1498
1499         WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1500                 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1501
1502         msleep(GOYA_RESET_WAIT_MSEC);
1503
1504         WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1505                 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1506
1507         msleep(GOYA_RESET_WAIT_MSEC);
1508
1509         for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1510                 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1511
1512         val = RREG32(tpc_slm_offset);
1513 }
1514
1515 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1516 {
1517         struct goya_device *goya = hdev->asic_specific;
1518         int i;
1519
1520         if (hdev->pldm)
1521                 return;
1522
1523         if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1524                 return;
1525
1526         /* Workaround for H2 #2443 */
1527
1528         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1529                 _goya_tpc_mbist_workaround(hdev, i);
1530
1531         goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1532 }
1533
1534 /*
1535  * goya_init_golden_registers - Initialize golden registers
1536  *
1537  * @hdev: pointer to hl_device structure
1538  *
1539  * Initialize the H/W registers of the device
1540  *
1541  */
1542 static void goya_init_golden_registers(struct hl_device *hdev)
1543 {
1544         struct goya_device *goya = hdev->asic_specific;
1545         u32 polynom[10], tpc_intr_mask, offset;
1546         int i;
1547
1548         if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1549                 return;
1550
1551         polynom[0] = 0x00020080;
1552         polynom[1] = 0x00401000;
1553         polynom[2] = 0x00200800;
1554         polynom[3] = 0x00002000;
1555         polynom[4] = 0x00080200;
1556         polynom[5] = 0x00040100;
1557         polynom[6] = 0x00100400;
1558         polynom[7] = 0x00004000;
1559         polynom[8] = 0x00010000;
1560         polynom[9] = 0x00008000;
1561
1562         /* Mask all arithmetic interrupts from TPC */
1563         tpc_intr_mask = 0x7FFF;
1564
1565         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1566                 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1567                 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1568                 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1569                 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1570                 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1571
1572                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1573                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1574                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1575                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1576                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1577
1578
1579                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1580                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1581                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1582                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1583                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1584
1585                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1586                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1587                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1588                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1589                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1590
1591                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1592                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1593                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1594                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1595                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1596
1597                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1598                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1599                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1600                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1601                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1602         }
1603
1604         WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1605         WREG32(mmMME_AGU, 0x0f0f0f10);
1606         WREG32(mmMME_SEI_MASK, ~0x0);
1607
1608         WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1609         WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1610         WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1611         WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1612         WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1613         WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1614         WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1615         WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1616         WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1617         WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1618         WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1619         WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1620         WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1621         WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1622         WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1623         WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1624         WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1625         WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1626         WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1627         WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1628         WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1629         WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1630         WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1631         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1632         WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1633         WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1634         WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1635         WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1636         WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1637         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1638         WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1639         WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1640         WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1641         WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1642         WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1643         WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1644         WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1645         WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1646         WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1647         WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1648         WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1649         WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1650         WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1651         WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1652         WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1653         WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1654         WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1655         WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1656         WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1657         WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1658         WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1659         WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1660         WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1661         WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1662         WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1663         WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1664         WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1665         WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1666         WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1667         WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1668         WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1669         WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1670         WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1671         WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1672         WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1673         WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1674         WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1675         WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1676         WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1677         WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1678         WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1679         WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1680         WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1681         WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1682         WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1683         WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1684         WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1685         WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1686         WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1687         WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1688         WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1689         WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1690         WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1691         WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1692
1693         WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1694         WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1695         WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1696         WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1697         WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1698         WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1699         WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1700         WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1701         WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1702         WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1703         WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1704         WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1705
1706         WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1707         WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1708         WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1709         WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1710         WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1711         WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1712         WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1713         WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1714         WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1715         WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1716         WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1717         WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1718
1719         WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1720         WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1721         WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1722         WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1723         WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1724         WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1725         WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1726         WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1727         WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1728         WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1729         WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1730         WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1731
1732         WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1733         WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1734         WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1735         WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1736         WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1737         WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1738         WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1739         WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1740         WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1741         WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1742         WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1743         WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1744
1745         WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1746         WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1747         WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1748         WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1749         WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1750         WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1751         WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1752         WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1753         WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1754         WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1755         WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1756         WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1757
1758         WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1759         WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1760         WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1761         WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1762         WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1763         WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1764         WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1765         WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1766         WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1767         WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1768         WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1769         WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1770
1771         for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1772                 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1773                 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1774                 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1775                 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1776                 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1777                 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1778
1779                 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1780                 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1781                 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1782                 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1783                 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1784                 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1785                 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1786                 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1787
1788                 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1789                 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1790         }
1791
1792         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1793                 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1794                                 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1795                 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1796                                 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1797         }
1798
1799         for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1800                 /*
1801                  * Workaround for Bug H2 #2441 :
1802                  * "ST.NOP set trace event illegal opcode"
1803                  */
1804                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1805
1806                 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1807                                 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1808                 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1809                                 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1810
1811                 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
1812                                 ICACHE_FETCH_LINE_NUM, 2);
1813         }
1814
1815         WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1816         WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1817                         1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1818
1819         WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1820         WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1821                         1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1822
1823         /*
1824          * Workaround for H2 #HW-23 bug
1825          * Set DMA max outstanding read requests to 240 on DMA CH 1.
1826          * This limitation is still large enough to not affect Gen4 bandwidth.
1827          * We need to only limit that DMA channel because the user can only read
1828          * from Host using DMA CH 1
1829          */
1830         WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1831
1832         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1833
1834         goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1835 }
1836
1837 static void goya_init_mme_qman(struct hl_device *hdev)
1838 {
1839         u32 mtr_base_lo, mtr_base_hi;
1840         u32 so_base_lo, so_base_hi;
1841         u32 gic_base_lo, gic_base_hi;
1842         u64 qman_base_addr;
1843
1844         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1845         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1846         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1847         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1848
1849         gic_base_lo =
1850                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1851         gic_base_hi =
1852                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1853
1854         qman_base_addr = hdev->asic_prop.sram_base_address +
1855                                 MME_QMAN_BASE_OFFSET;
1856
1857         WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1858         WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1859         WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1860         WREG32(mmMME_QM_PQ_PI, 0);
1861         WREG32(mmMME_QM_PQ_CI, 0);
1862         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1863         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1864         WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1865         WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1866
1867         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1868         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1869         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1870         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1871
1872         /* QMAN CQ has 8 cache lines */
1873         WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1874
1875         WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1876         WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1877
1878         WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1879
1880         WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1881
1882         WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1883
1884         WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1885 }
1886
1887 static void goya_init_mme_cmdq(struct hl_device *hdev)
1888 {
1889         u32 mtr_base_lo, mtr_base_hi;
1890         u32 so_base_lo, so_base_hi;
1891         u32 gic_base_lo, gic_base_hi;
1892
1893         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1894         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1895         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1896         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1897
1898         gic_base_lo =
1899                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1900         gic_base_hi =
1901                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1902
1903         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1904         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1905         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1906         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1907
1908         /* CMDQ CQ has 20 cache lines */
1909         WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1910
1911         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1912         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1913
1914         WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1915
1916         WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1917
1918         WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1919
1920         WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1921 }
1922
1923 void goya_init_mme_qmans(struct hl_device *hdev)
1924 {
1925         struct goya_device *goya = hdev->asic_specific;
1926         u32 so_base_lo, so_base_hi;
1927
1928         if (goya->hw_cap_initialized & HW_CAP_MME)
1929                 return;
1930
1931         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1932         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1933
1934         WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1935         WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1936
1937         goya_init_mme_qman(hdev);
1938         goya_init_mme_cmdq(hdev);
1939
1940         goya->hw_cap_initialized |= HW_CAP_MME;
1941 }
1942
1943 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1944 {
1945         u32 mtr_base_lo, mtr_base_hi;
1946         u32 so_base_lo, so_base_hi;
1947         u32 gic_base_lo, gic_base_hi;
1948         u64 qman_base_addr;
1949         u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1950
1951         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1952         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1953         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1954         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1955
1956         gic_base_lo =
1957                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1958         gic_base_hi =
1959                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1960
1961         qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1962
1963         WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1964         WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1965         WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1966         WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1967         WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1968         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1969         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1970         WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1971         WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1972
1973         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1974         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1975         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1976         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1977
1978         WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1979
1980         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1981         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1982
1983         WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1984                         GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1985
1986         WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1987
1988         WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1989
1990         WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1991 }
1992
1993 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1994 {
1995         u32 mtr_base_lo, mtr_base_hi;
1996         u32 so_base_lo, so_base_hi;
1997         u32 gic_base_lo, gic_base_hi;
1998         u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1999
2000         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
2001         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
2002         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2003         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2004
2005         gic_base_lo =
2006                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2007         gic_base_hi =
2008                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
2009
2010         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
2011         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
2012         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
2013         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
2014
2015         WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
2016
2017         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
2018         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
2019
2020         WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
2021                         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
2022
2023         WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
2024
2025         WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
2026
2027         WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
2028 }
2029
2030 void goya_init_tpc_qmans(struct hl_device *hdev)
2031 {
2032         struct goya_device *goya = hdev->asic_specific;
2033         u32 so_base_lo, so_base_hi;
2034         u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
2035                         mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
2036         int i;
2037
2038         if (goya->hw_cap_initialized & HW_CAP_TPC)
2039                 return;
2040
2041         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2042         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
2043
2044         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
2045                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
2046                                 so_base_lo);
2047                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
2048                                 so_base_hi);
2049         }
2050
2051         goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
2052         goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
2053         goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
2054         goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
2055         goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
2056         goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
2057         goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
2058         goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
2059
2060         for (i = 0 ; i < TPC_MAX_NUM ; i++)
2061                 goya_init_tpc_cmdq(hdev, i);
2062
2063         goya->hw_cap_initialized |= HW_CAP_TPC;
2064 }
2065
2066 /*
2067  * goya_disable_internal_queues - Disable internal queues
2068  *
2069  * @hdev: pointer to hl_device structure
2070  *
2071  */
2072 static void goya_disable_internal_queues(struct hl_device *hdev)
2073 {
2074         struct goya_device *goya = hdev->asic_specific;
2075
2076         if (!(goya->hw_cap_initialized & HW_CAP_MME))
2077                 goto disable_tpc;
2078
2079         WREG32(mmMME_QM_GLBL_CFG0, 0);
2080         WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
2081
2082 disable_tpc:
2083         if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2084                 return;
2085
2086         WREG32(mmTPC0_QM_GLBL_CFG0, 0);
2087         WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
2088
2089         WREG32(mmTPC1_QM_GLBL_CFG0, 0);
2090         WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
2091
2092         WREG32(mmTPC2_QM_GLBL_CFG0, 0);
2093         WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
2094
2095         WREG32(mmTPC3_QM_GLBL_CFG0, 0);
2096         WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
2097
2098         WREG32(mmTPC4_QM_GLBL_CFG0, 0);
2099         WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
2100
2101         WREG32(mmTPC5_QM_GLBL_CFG0, 0);
2102         WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
2103
2104         WREG32(mmTPC6_QM_GLBL_CFG0, 0);
2105         WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
2106
2107         WREG32(mmTPC7_QM_GLBL_CFG0, 0);
2108         WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
2109 }
2110
2111 /*
2112  * goya_stop_internal_queues - Stop internal queues
2113  *
2114  * @hdev: pointer to hl_device structure
2115  *
2116  * Returns 0 on success
2117  *
2118  */
2119 static int goya_stop_internal_queues(struct hl_device *hdev)
2120 {
2121         struct goya_device *goya = hdev->asic_specific;
2122         int rc, retval = 0;
2123
2124         if (!(goya->hw_cap_initialized & HW_CAP_MME))
2125                 goto stop_tpc;
2126
2127         /*
2128          * Each queue (QMAN) is a separate H/W logic. That means that each
2129          * QMAN can be stopped independently and failure to stop one does NOT
2130          * mandate we should not try to stop other QMANs
2131          */
2132
2133         rc = goya_stop_queue(hdev,
2134                         mmMME_QM_GLBL_CFG1,
2135                         mmMME_QM_CP_STS,
2136                         mmMME_QM_GLBL_STS0);
2137
2138         if (rc) {
2139                 dev_err(hdev->dev, "failed to stop MME QMAN\n");
2140                 retval = -EIO;
2141         }
2142
2143         rc = goya_stop_queue(hdev,
2144                         mmMME_CMDQ_GLBL_CFG1,
2145                         mmMME_CMDQ_CP_STS,
2146                         mmMME_CMDQ_GLBL_STS0);
2147
2148         if (rc) {
2149                 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2150                 retval = -EIO;
2151         }
2152
2153 stop_tpc:
2154         if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2155                 return retval;
2156
2157         rc = goya_stop_queue(hdev,
2158                         mmTPC0_QM_GLBL_CFG1,
2159                         mmTPC0_QM_CP_STS,
2160                         mmTPC0_QM_GLBL_STS0);
2161
2162         if (rc) {
2163                 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2164                 retval = -EIO;
2165         }
2166
2167         rc = goya_stop_queue(hdev,
2168                         mmTPC0_CMDQ_GLBL_CFG1,
2169                         mmTPC0_CMDQ_CP_STS,
2170                         mmTPC0_CMDQ_GLBL_STS0);
2171
2172         if (rc) {
2173                 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2174                 retval = -EIO;
2175         }
2176
2177         rc = goya_stop_queue(hdev,
2178                         mmTPC1_QM_GLBL_CFG1,
2179                         mmTPC1_QM_CP_STS,
2180                         mmTPC1_QM_GLBL_STS0);
2181
2182         if (rc) {
2183                 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2184                 retval = -EIO;
2185         }
2186
2187         rc = goya_stop_queue(hdev,
2188                         mmTPC1_CMDQ_GLBL_CFG1,
2189                         mmTPC1_CMDQ_CP_STS,
2190                         mmTPC1_CMDQ_GLBL_STS0);
2191
2192         if (rc) {
2193                 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2194                 retval = -EIO;
2195         }
2196
2197         rc = goya_stop_queue(hdev,
2198                         mmTPC2_QM_GLBL_CFG1,
2199                         mmTPC2_QM_CP_STS,
2200                         mmTPC2_QM_GLBL_STS0);
2201
2202         if (rc) {
2203                 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2204                 retval = -EIO;
2205         }
2206
2207         rc = goya_stop_queue(hdev,
2208                         mmTPC2_CMDQ_GLBL_CFG1,
2209                         mmTPC2_CMDQ_CP_STS,
2210                         mmTPC2_CMDQ_GLBL_STS0);
2211
2212         if (rc) {
2213                 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2214                 retval = -EIO;
2215         }
2216
2217         rc = goya_stop_queue(hdev,
2218                         mmTPC3_QM_GLBL_CFG1,
2219                         mmTPC3_QM_CP_STS,
2220                         mmTPC3_QM_GLBL_STS0);
2221
2222         if (rc) {
2223                 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2224                 retval = -EIO;
2225         }
2226
2227         rc = goya_stop_queue(hdev,
2228                         mmTPC3_CMDQ_GLBL_CFG1,
2229                         mmTPC3_CMDQ_CP_STS,
2230                         mmTPC3_CMDQ_GLBL_STS0);
2231
2232         if (rc) {
2233                 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2234                 retval = -EIO;
2235         }
2236
2237         rc = goya_stop_queue(hdev,
2238                         mmTPC4_QM_GLBL_CFG1,
2239                         mmTPC4_QM_CP_STS,
2240                         mmTPC4_QM_GLBL_STS0);
2241
2242         if (rc) {
2243                 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2244                 retval = -EIO;
2245         }
2246
2247         rc = goya_stop_queue(hdev,
2248                         mmTPC4_CMDQ_GLBL_CFG1,
2249                         mmTPC4_CMDQ_CP_STS,
2250                         mmTPC4_CMDQ_GLBL_STS0);
2251
2252         if (rc) {
2253                 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2254                 retval = -EIO;
2255         }
2256
2257         rc = goya_stop_queue(hdev,
2258                         mmTPC5_QM_GLBL_CFG1,
2259                         mmTPC5_QM_CP_STS,
2260                         mmTPC5_QM_GLBL_STS0);
2261
2262         if (rc) {
2263                 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2264                 retval = -EIO;
2265         }
2266
2267         rc = goya_stop_queue(hdev,
2268                         mmTPC5_CMDQ_GLBL_CFG1,
2269                         mmTPC5_CMDQ_CP_STS,
2270                         mmTPC5_CMDQ_GLBL_STS0);
2271
2272         if (rc) {
2273                 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2274                 retval = -EIO;
2275         }
2276
2277         rc = goya_stop_queue(hdev,
2278                         mmTPC6_QM_GLBL_CFG1,
2279                         mmTPC6_QM_CP_STS,
2280                         mmTPC6_QM_GLBL_STS0);
2281
2282         if (rc) {
2283                 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2284                 retval = -EIO;
2285         }
2286
2287         rc = goya_stop_queue(hdev,
2288                         mmTPC6_CMDQ_GLBL_CFG1,
2289                         mmTPC6_CMDQ_CP_STS,
2290                         mmTPC6_CMDQ_GLBL_STS0);
2291
2292         if (rc) {
2293                 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2294                 retval = -EIO;
2295         }
2296
2297         rc = goya_stop_queue(hdev,
2298                         mmTPC7_QM_GLBL_CFG1,
2299                         mmTPC7_QM_CP_STS,
2300                         mmTPC7_QM_GLBL_STS0);
2301
2302         if (rc) {
2303                 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2304                 retval = -EIO;
2305         }
2306
2307         rc = goya_stop_queue(hdev,
2308                         mmTPC7_CMDQ_GLBL_CFG1,
2309                         mmTPC7_CMDQ_CP_STS,
2310                         mmTPC7_CMDQ_GLBL_STS0);
2311
2312         if (rc) {
2313                 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2314                 retval = -EIO;
2315         }
2316
2317         return retval;
2318 }
2319
2320 static void goya_dma_stall(struct hl_device *hdev)
2321 {
2322         struct goya_device *goya = hdev->asic_specific;
2323
2324         if (!(goya->hw_cap_initialized & HW_CAP_DMA))
2325                 return;
2326
2327         WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2328         WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2329         WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2330         WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2331         WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2332 }
2333
2334 static void goya_tpc_stall(struct hl_device *hdev)
2335 {
2336         struct goya_device *goya = hdev->asic_specific;
2337
2338         if (!(goya->hw_cap_initialized & HW_CAP_TPC))
2339                 return;
2340
2341         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2342         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2343         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2344         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2345         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2346         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2347         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2348         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2349 }
2350
2351 static void goya_mme_stall(struct hl_device *hdev)
2352 {
2353         struct goya_device *goya = hdev->asic_specific;
2354
2355         if (!(goya->hw_cap_initialized & HW_CAP_MME))
2356                 return;
2357
2358         WREG32(mmMME_STALL, 0xFFFFFFFF);
2359 }
2360
2361 static int goya_enable_msix(struct hl_device *hdev)
2362 {
2363         struct goya_device *goya = hdev->asic_specific;
2364         int cq_cnt = hdev->asic_prop.completion_queues_count;
2365         int rc, i, irq_cnt_init, irq;
2366
2367         if (goya->hw_cap_initialized & HW_CAP_MSIX)
2368                 return 0;
2369
2370         rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2371                                 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2372         if (rc < 0) {
2373                 dev_err(hdev->dev,
2374                         "MSI-X: Failed to enable support -- %d/%d\n",
2375                         GOYA_MSIX_ENTRIES, rc);
2376                 return rc;
2377         }
2378
2379         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2380                 irq = pci_irq_vector(hdev->pdev, i);
2381                 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2382                                 &hdev->completion_queue[i]);
2383                 if (rc) {
2384                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2385                         goto free_irqs;
2386                 }
2387         }
2388
2389         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2390
2391         rc = request_irq(irq, hl_irq_handler_eq, 0,
2392                         goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2393                         &hdev->event_queue);
2394         if (rc) {
2395                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2396                 goto free_irqs;
2397         }
2398
2399         goya->hw_cap_initialized |= HW_CAP_MSIX;
2400         return 0;
2401
2402 free_irqs:
2403         for (i = 0 ; i < irq_cnt_init ; i++)
2404                 free_irq(pci_irq_vector(hdev->pdev, i),
2405                         &hdev->completion_queue[i]);
2406
2407         pci_free_irq_vectors(hdev->pdev);
2408         return rc;
2409 }
2410
2411 static void goya_sync_irqs(struct hl_device *hdev)
2412 {
2413         struct goya_device *goya = hdev->asic_specific;
2414         int i;
2415
2416         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2417                 return;
2418
2419         /* Wait for all pending IRQs to be finished */
2420         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2421                 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2422
2423         synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2424 }
2425
2426 static void goya_disable_msix(struct hl_device *hdev)
2427 {
2428         struct goya_device *goya = hdev->asic_specific;
2429         int i, irq;
2430
2431         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2432                 return;
2433
2434         goya_sync_irqs(hdev);
2435
2436         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2437         free_irq(irq, &hdev->event_queue);
2438
2439         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2440                 irq = pci_irq_vector(hdev->pdev, i);
2441                 free_irq(irq, &hdev->completion_queue[i]);
2442         }
2443
2444         pci_free_irq_vectors(hdev->pdev);
2445
2446         goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2447 }
2448
2449 static void goya_enable_timestamp(struct hl_device *hdev)
2450 {
2451         /* Disable the timestamp counter */
2452         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2453
2454         /* Zero the lower/upper parts of the 64-bit counter */
2455         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2456         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2457
2458         /* Enable the counter */
2459         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2460 }
2461
2462 static void goya_disable_timestamp(struct hl_device *hdev)
2463 {
2464         /* Disable the timestamp counter */
2465         WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2466 }
2467
2468 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2469 {
2470         u32 wait_timeout_ms;
2471
2472         dev_info(hdev->dev,
2473                 "Halting compute engines and disabling interrupts\n");
2474
2475         if (hdev->pldm)
2476                 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2477         else
2478                 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2479
2480         goya_stop_external_queues(hdev);
2481         goya_stop_internal_queues(hdev);
2482
2483         msleep(wait_timeout_ms);
2484
2485         goya_dma_stall(hdev);
2486         goya_tpc_stall(hdev);
2487         goya_mme_stall(hdev);
2488
2489         msleep(wait_timeout_ms);
2490
2491         goya_disable_external_queues(hdev);
2492         goya_disable_internal_queues(hdev);
2493
2494         goya_disable_timestamp(hdev);
2495
2496         if (hard_reset) {
2497                 goya_disable_msix(hdev);
2498                 goya_mmu_remove_device_cpu_mappings(hdev);
2499         } else {
2500                 goya_sync_irqs(hdev);
2501         }
2502 }
2503
2504 /*
2505  * goya_load_firmware_to_device() - Load LINUX FW code to device.
2506  * @hdev: Pointer to hl_device structure.
2507  *
2508  * Copy LINUX fw code from firmware file to HBM BAR.
2509  *
2510  * Return: 0 on success, non-zero for failure.
2511  */
2512 static int goya_load_firmware_to_device(struct hl_device *hdev)
2513 {
2514         void __iomem *dst;
2515
2516         dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2517
2518         return hl_fw_load_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst, 0, 0);
2519 }
2520
2521 /*
2522  * goya_load_boot_fit_to_device() - Load boot fit to device.
2523  * @hdev: Pointer to hl_device structure.
2524  *
2525  * Copy boot fit file to SRAM BAR.
2526  *
2527  * Return: 0 on success, non-zero for failure.
2528  */
2529 static int goya_load_boot_fit_to_device(struct hl_device *hdev)
2530 {
2531         void __iomem *dst;
2532
2533         dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2534
2535         return hl_fw_load_fw_to_device(hdev, GOYA_BOOT_FIT_FILE, dst, 0, 0);
2536 }
2537
2538 static void goya_init_dynamic_firmware_loader(struct hl_device *hdev)
2539 {
2540         struct dynamic_fw_load_mgr *dynamic_loader;
2541         struct cpu_dyn_regs *dyn_regs;
2542
2543         dynamic_loader = &hdev->fw_loader.dynamic_loader;
2544
2545         /*
2546          * here we update initial values for few specific dynamic regs (as
2547          * before reading the first descriptor from FW those value has to be
2548          * hard-coded) in later stages of the protocol those values will be
2549          * updated automatically by reading the FW descriptor so data there
2550          * will always be up-to-date
2551          */
2552         dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
2553         dyn_regs->kmd_msg_to_cpu =
2554                                 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
2555         dyn_regs->cpu_cmd_status_to_host =
2556                                 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
2557
2558         dynamic_loader->wait_for_bl_timeout = GOYA_WAIT_FOR_BL_TIMEOUT_USEC;
2559 }
2560
2561 static void goya_init_static_firmware_loader(struct hl_device *hdev)
2562 {
2563         struct static_fw_load_mgr *static_loader;
2564
2565         static_loader = &hdev->fw_loader.static_loader;
2566
2567         static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2568         static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
2569         static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
2570         static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
2571         static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
2572         static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
2573         static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
2574         static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
2575         static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
2576         static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
2577         static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
2578         static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
2579 }
2580
2581 static void goya_init_firmware_loader(struct hl_device *hdev)
2582 {
2583         struct asic_fixed_properties *prop = &hdev->asic_prop;
2584         struct fw_load_mgr *fw_loader = &hdev->fw_loader;
2585
2586         /* fill common fields */
2587         fw_loader->fw_comp_loaded = FW_TYPE_NONE;
2588         fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
2589         fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
2590         fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
2591         fw_loader->boot_fit_timeout = GOYA_BOOT_FIT_REQ_TIMEOUT_USEC;
2592         fw_loader->skip_bmc = false;
2593         fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
2594         fw_loader->dram_bar_id = DDR_BAR_ID;
2595
2596         if (prop->dynamic_fw_load)
2597                 goya_init_dynamic_firmware_loader(hdev);
2598         else
2599                 goya_init_static_firmware_loader(hdev);
2600 }
2601
2602 static int goya_init_cpu(struct hl_device *hdev)
2603 {
2604         struct goya_device *goya = hdev->asic_specific;
2605         int rc;
2606
2607         if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
2608                 return 0;
2609
2610         if (goya->hw_cap_initialized & HW_CAP_CPU)
2611                 return 0;
2612
2613         /*
2614          * Before pushing u-boot/linux to device, need to set the ddr bar to
2615          * base address of dram
2616          */
2617         if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2618                 dev_err(hdev->dev,
2619                         "failed to map DDR bar to DRAM base address\n");
2620                 return -EIO;
2621         }
2622
2623         rc = hl_fw_init_cpu(hdev);
2624
2625         if (rc)
2626                 return rc;
2627
2628         goya->hw_cap_initialized |= HW_CAP_CPU;
2629
2630         return 0;
2631 }
2632
2633 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2634                                                 u64 phys_addr)
2635 {
2636         u32 status, timeout_usec;
2637         int rc;
2638
2639         if (hdev->pldm)
2640                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2641         else
2642                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2643
2644         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2645         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2646         WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2647
2648         rc = hl_poll_timeout(
2649                 hdev,
2650                 MMU_ASID_BUSY,
2651                 status,
2652                 !(status & 0x80000000),
2653                 1000,
2654                 timeout_usec);
2655
2656         if (rc) {
2657                 dev_err(hdev->dev,
2658                         "Timeout during MMU hop0 config of asid %d\n", asid);
2659                 return rc;
2660         }
2661
2662         return 0;
2663 }
2664
2665 int goya_mmu_init(struct hl_device *hdev)
2666 {
2667         struct asic_fixed_properties *prop = &hdev->asic_prop;
2668         struct goya_device *goya = hdev->asic_specific;
2669         u64 hop0_addr;
2670         int rc, i;
2671
2672         if (!hdev->mmu_enable)
2673                 return 0;
2674
2675         if (goya->hw_cap_initialized & HW_CAP_MMU)
2676                 return 0;
2677
2678         hdev->dram_default_page_mapping = true;
2679
2680         for (i = 0 ; i < prop->max_asid ; i++) {
2681                 hop0_addr = prop->mmu_pgt_addr +
2682                                 (i * prop->mmu_hop_table_size);
2683
2684                 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2685                 if (rc) {
2686                         dev_err(hdev->dev,
2687                                 "failed to set hop0 addr for asid %d\n", i);
2688                         goto err;
2689                 }
2690         }
2691
2692         goya->hw_cap_initialized |= HW_CAP_MMU;
2693
2694         /* init MMU cache manage page */
2695         WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2696                                 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2697         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2698
2699         /* Remove follower feature due to performance bug */
2700         WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2701                         (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2702
2703         hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2704                                         MMU_OP_USERPTR | MMU_OP_PHYS_PACK);
2705
2706         WREG32(mmMMU_MMU_ENABLE, 1);
2707         WREG32(mmMMU_SPI_MASK, 0xF);
2708
2709         return 0;
2710
2711 err:
2712         return rc;
2713 }
2714
2715 /*
2716  * goya_hw_init - Goya hardware initialization code
2717  *
2718  * @hdev: pointer to hl_device structure
2719  *
2720  * Returns 0 on success
2721  *
2722  */
2723 static int goya_hw_init(struct hl_device *hdev)
2724 {
2725         struct asic_fixed_properties *prop = &hdev->asic_prop;
2726         int rc;
2727
2728         /* Perform read from the device to make sure device is up */
2729         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2730
2731         /*
2732          * Let's mark in the H/W that we have reached this point. We check
2733          * this value in the reset_before_init function to understand whether
2734          * we need to reset the chip before doing H/W init. This register is
2735          * cleared by the H/W upon H/W reset
2736          */
2737         WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2738
2739         rc = goya_init_cpu(hdev);
2740         if (rc) {
2741                 dev_err(hdev->dev, "failed to initialize CPU\n");
2742                 return rc;
2743         }
2744
2745         goya_tpc_mbist_workaround(hdev);
2746
2747         goya_init_golden_registers(hdev);
2748
2749         /*
2750          * After CPU initialization is finished, change DDR bar mapping inside
2751          * iATU to point to the start address of the MMU page tables
2752          */
2753         if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
2754                         ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2755                 dev_err(hdev->dev,
2756                         "failed to map DDR bar to MMU page tables\n");
2757                 return -EIO;
2758         }
2759
2760         rc = goya_mmu_init(hdev);
2761         if (rc)
2762                 return rc;
2763
2764         goya_init_security(hdev);
2765
2766         goya_init_dma_qmans(hdev);
2767
2768         goya_init_mme_qmans(hdev);
2769
2770         goya_init_tpc_qmans(hdev);
2771
2772         goya_enable_timestamp(hdev);
2773
2774         /* MSI-X must be enabled before CPU queues are initialized */
2775         rc = goya_enable_msix(hdev);
2776         if (rc)
2777                 goto disable_queues;
2778
2779         /* Perform read from the device to flush all MSI-X configuration */
2780         RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2781
2782         return 0;
2783
2784 disable_queues:
2785         goya_disable_internal_queues(hdev);
2786         goya_disable_external_queues(hdev);
2787
2788         return rc;
2789 }
2790
2791 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
2792 {
2793         struct goya_device *goya = hdev->asic_specific;
2794         u32 reset_timeout_ms, cpu_timeout_ms, status;
2795
2796         if (hdev->pldm) {
2797                 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2798                 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2799         } else {
2800                 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2801                 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2802         }
2803
2804         if (hard_reset) {
2805                 /* I don't know what is the state of the CPU so make sure it is
2806                  * stopped in any means necessary
2807                  */
2808                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2809                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2810                         GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2811
2812                 msleep(cpu_timeout_ms);
2813
2814                 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2815                 goya_disable_clk_rlx(hdev);
2816                 goya_set_pll_refclk(hdev);
2817
2818                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2819                 dev_info(hdev->dev,
2820                         "Issued HARD reset command, going to wait %dms\n",
2821                         reset_timeout_ms);
2822         } else {
2823                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2824                 dev_info(hdev->dev,
2825                         "Issued SOFT reset command, going to wait %dms\n",
2826                         reset_timeout_ms);
2827         }
2828
2829         /*
2830          * After hard reset, we can't poll the BTM_FSM register because the PSOC
2831          * itself is in reset. In either reset we need to wait until the reset
2832          * is deasserted
2833          */
2834         msleep(reset_timeout_ms);
2835
2836         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2837         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2838                 dev_err(hdev->dev,
2839                         "Timeout while waiting for device to reset 0x%x\n",
2840                         status);
2841
2842         if (!hard_reset && goya) {
2843                 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2844                                                 HW_CAP_GOLDEN | HW_CAP_TPC);
2845                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2846                                 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2847                 return;
2848         }
2849
2850         /* Chicken bit to re-initiate boot sequencer flow */
2851         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2852                 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2853         /* Move boot manager FSM to pre boot sequencer init state */
2854         WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2855                         0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2856
2857         if (goya) {
2858                 goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2859                                 HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2860                                 HW_CAP_DMA | HW_CAP_MME |
2861                                 HW_CAP_MMU | HW_CAP_TPC_MBIST |
2862                                 HW_CAP_GOLDEN | HW_CAP_TPC);
2863
2864                 memset(goya->events_stat, 0, sizeof(goya->events_stat));
2865         }
2866 }
2867
2868 int goya_suspend(struct hl_device *hdev)
2869 {
2870         int rc;
2871
2872         rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
2873         if (rc)
2874                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2875
2876         return rc;
2877 }
2878
2879 int goya_resume(struct hl_device *hdev)
2880 {
2881         return goya_init_iatu(hdev);
2882 }
2883
2884 static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2885                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
2886 {
2887         int rc;
2888
2889         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2890                         VM_DONTCOPY | VM_NORESERVE;
2891
2892         rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
2893                                 (dma_addr - HOST_PHYS_BASE), size);
2894         if (rc)
2895                 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
2896
2897         return rc;
2898 }
2899
2900 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2901 {
2902         u32 db_reg_offset, db_value;
2903
2904         switch (hw_queue_id) {
2905         case GOYA_QUEUE_ID_DMA_0:
2906                 db_reg_offset = mmDMA_QM_0_PQ_PI;
2907                 break;
2908
2909         case GOYA_QUEUE_ID_DMA_1:
2910                 db_reg_offset = mmDMA_QM_1_PQ_PI;
2911                 break;
2912
2913         case GOYA_QUEUE_ID_DMA_2:
2914                 db_reg_offset = mmDMA_QM_2_PQ_PI;
2915                 break;
2916
2917         case GOYA_QUEUE_ID_DMA_3:
2918                 db_reg_offset = mmDMA_QM_3_PQ_PI;
2919                 break;
2920
2921         case GOYA_QUEUE_ID_DMA_4:
2922                 db_reg_offset = mmDMA_QM_4_PQ_PI;
2923                 break;
2924
2925         case GOYA_QUEUE_ID_CPU_PQ:
2926                 db_reg_offset = mmCPU_IF_PF_PQ_PI;
2927                 break;
2928
2929         case GOYA_QUEUE_ID_MME:
2930                 db_reg_offset = mmMME_QM_PQ_PI;
2931                 break;
2932
2933         case GOYA_QUEUE_ID_TPC0:
2934                 db_reg_offset = mmTPC0_QM_PQ_PI;
2935                 break;
2936
2937         case GOYA_QUEUE_ID_TPC1:
2938                 db_reg_offset = mmTPC1_QM_PQ_PI;
2939                 break;
2940
2941         case GOYA_QUEUE_ID_TPC2:
2942                 db_reg_offset = mmTPC2_QM_PQ_PI;
2943                 break;
2944
2945         case GOYA_QUEUE_ID_TPC3:
2946                 db_reg_offset = mmTPC3_QM_PQ_PI;
2947                 break;
2948
2949         case GOYA_QUEUE_ID_TPC4:
2950                 db_reg_offset = mmTPC4_QM_PQ_PI;
2951                 break;
2952
2953         case GOYA_QUEUE_ID_TPC5:
2954                 db_reg_offset = mmTPC5_QM_PQ_PI;
2955                 break;
2956
2957         case GOYA_QUEUE_ID_TPC6:
2958                 db_reg_offset = mmTPC6_QM_PQ_PI;
2959                 break;
2960
2961         case GOYA_QUEUE_ID_TPC7:
2962                 db_reg_offset = mmTPC7_QM_PQ_PI;
2963                 break;
2964
2965         default:
2966                 /* Should never get here */
2967                 dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2968                         hw_queue_id);
2969                 return;
2970         }
2971
2972         db_value = pi;
2973
2974         /* ring the doorbell */
2975         WREG32(db_reg_offset, db_value);
2976
2977         if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
2978                 /* make sure device CPU will read latest data from host */
2979                 mb();
2980                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2981                                 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2982         }
2983 }
2984
2985 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2986 {
2987         /* The QMANs are on the SRAM so need to copy to IO space */
2988         memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2989 }
2990
2991 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2992                                         dma_addr_t *dma_handle, gfp_t flags)
2993 {
2994         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2995                                                 dma_handle, flags);
2996
2997         /* Shift to the device's base physical address of host memory */
2998         if (kernel_addr)
2999                 *dma_handle += HOST_PHYS_BASE;
3000
3001         return kernel_addr;
3002 }
3003
3004 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
3005                                         void *cpu_addr, dma_addr_t dma_handle)
3006 {
3007         /* Cancel the device's base physical address of host memory */
3008         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3009
3010         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3011 }
3012
3013 int goya_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
3014 {
3015         return 0;
3016 }
3017
3018 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
3019                                 dma_addr_t *dma_handle, u16 *queue_len)
3020 {
3021         void *base;
3022         u32 offset;
3023
3024         *dma_handle = hdev->asic_prop.sram_base_address;
3025
3026         base = (__force void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
3027
3028         switch (queue_id) {
3029         case GOYA_QUEUE_ID_MME:
3030                 offset = MME_QMAN_BASE_OFFSET;
3031                 *queue_len = MME_QMAN_LENGTH;
3032                 break;
3033         case GOYA_QUEUE_ID_TPC0:
3034                 offset = TPC0_QMAN_BASE_OFFSET;
3035                 *queue_len = TPC_QMAN_LENGTH;
3036                 break;
3037         case GOYA_QUEUE_ID_TPC1:
3038                 offset = TPC1_QMAN_BASE_OFFSET;
3039                 *queue_len = TPC_QMAN_LENGTH;
3040                 break;
3041         case GOYA_QUEUE_ID_TPC2:
3042                 offset = TPC2_QMAN_BASE_OFFSET;
3043                 *queue_len = TPC_QMAN_LENGTH;
3044                 break;
3045         case GOYA_QUEUE_ID_TPC3:
3046                 offset = TPC3_QMAN_BASE_OFFSET;
3047                 *queue_len = TPC_QMAN_LENGTH;
3048                 break;
3049         case GOYA_QUEUE_ID_TPC4:
3050                 offset = TPC4_QMAN_BASE_OFFSET;
3051                 *queue_len = TPC_QMAN_LENGTH;
3052                 break;
3053         case GOYA_QUEUE_ID_TPC5:
3054                 offset = TPC5_QMAN_BASE_OFFSET;
3055                 *queue_len = TPC_QMAN_LENGTH;
3056                 break;
3057         case GOYA_QUEUE_ID_TPC6:
3058                 offset = TPC6_QMAN_BASE_OFFSET;
3059                 *queue_len = TPC_QMAN_LENGTH;
3060                 break;
3061         case GOYA_QUEUE_ID_TPC7:
3062                 offset = TPC7_QMAN_BASE_OFFSET;
3063                 *queue_len = TPC_QMAN_LENGTH;
3064                 break;
3065         default:
3066                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3067                 return NULL;
3068         }
3069
3070         base += offset;
3071         *dma_handle += offset;
3072
3073         return base;
3074 }
3075
3076 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
3077 {
3078         struct packet_msg_prot *fence_pkt;
3079         u32 *fence_ptr;
3080         dma_addr_t fence_dma_addr;
3081         struct hl_cb *cb;
3082         u32 tmp, timeout;
3083         int rc;
3084
3085         if (hdev->pldm)
3086                 timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
3087         else
3088                 timeout = HL_DEVICE_TIMEOUT_USEC;
3089
3090         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
3091                 dev_err_ratelimited(hdev->dev,
3092                         "Can't send driver job on QMAN0 because the device is not idle\n");
3093                 return -EBUSY;
3094         }
3095
3096         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3097                                                         &fence_dma_addr);
3098         if (!fence_ptr) {
3099                 dev_err(hdev->dev,
3100                         "Failed to allocate fence memory for QMAN0\n");
3101                 return -ENOMEM;
3102         }
3103
3104         goya_qman0_set_security(hdev, true);
3105
3106         cb = job->patched_cb;
3107
3108         fence_pkt = cb->kernel_address +
3109                         job->job_cb_size - sizeof(struct packet_msg_prot);
3110
3111         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3112                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
3113                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3114         fence_pkt->ctl = cpu_to_le32(tmp);
3115         fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
3116         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3117
3118         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
3119                                         job->job_cb_size, cb->bus_address);
3120         if (rc) {
3121                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
3122                 goto free_fence_ptr;
3123         }
3124
3125         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
3126                                 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
3127                                 timeout, true);
3128
3129         hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
3130
3131         if (rc == -ETIMEDOUT) {
3132                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
3133                 goto free_fence_ptr;
3134         }
3135
3136 free_fence_ptr:
3137         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3138                                         fence_dma_addr);
3139
3140         goya_qman0_set_security(hdev, false);
3141
3142         return rc;
3143 }
3144
3145 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3146                                 u32 timeout, u64 *result)
3147 {
3148         struct goya_device *goya = hdev->asic_specific;
3149
3150         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3151                 if (result)
3152                         *result = 0;
3153                 return 0;
3154         }
3155
3156         if (!timeout)
3157                 timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC;
3158
3159         return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
3160                                         timeout, result);
3161 }
3162
3163 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3164 {
3165         struct packet_msg_prot *fence_pkt;
3166         dma_addr_t pkt_dma_addr;
3167         u32 fence_val, tmp;
3168         dma_addr_t fence_dma_addr;
3169         u32 *fence_ptr;
3170         int rc;
3171
3172         fence_val = GOYA_QMAN0_FENCE_VAL;
3173
3174         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3175                                                         &fence_dma_addr);
3176         if (!fence_ptr) {
3177                 dev_err(hdev->dev,
3178                         "Failed to allocate memory for H/W queue %d testing\n",
3179                         hw_queue_id);
3180                 return -ENOMEM;
3181         }
3182
3183         *fence_ptr = 0;
3184
3185         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3186                                         sizeof(struct packet_msg_prot),
3187                                         GFP_KERNEL, &pkt_dma_addr);
3188         if (!fence_pkt) {
3189                 dev_err(hdev->dev,
3190                         "Failed to allocate packet for H/W queue %d testing\n",
3191                         hw_queue_id);
3192                 rc = -ENOMEM;
3193                 goto free_fence_ptr;
3194         }
3195
3196         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3197                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
3198                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3199         fence_pkt->ctl = cpu_to_le32(tmp);
3200         fence_pkt->value = cpu_to_le32(fence_val);
3201         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3202
3203         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3204                                         sizeof(struct packet_msg_prot),
3205                                         pkt_dma_addr);
3206         if (rc) {
3207                 dev_err(hdev->dev,
3208                         "Failed to send fence packet to H/W queue %d\n",
3209                         hw_queue_id);
3210                 goto free_pkt;
3211         }
3212
3213         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3214                                         1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
3215
3216         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3217
3218         if (rc == -ETIMEDOUT) {
3219                 dev_err(hdev->dev,
3220                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3221                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3222                 rc = -EIO;
3223         }
3224
3225 free_pkt:
3226         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3227                                         pkt_dma_addr);
3228 free_fence_ptr:
3229         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3230                                         fence_dma_addr);
3231         return rc;
3232 }
3233
3234 int goya_test_cpu_queue(struct hl_device *hdev)
3235 {
3236         struct goya_device *goya = hdev->asic_specific;
3237
3238         /*
3239          * check capability here as send_cpu_message() won't update the result
3240          * value if no capability
3241          */
3242         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
3243                 return 0;
3244
3245         return hl_fw_test_cpu_queue(hdev);
3246 }
3247
3248 int goya_test_queues(struct hl_device *hdev)
3249 {
3250         int i, rc, ret_val = 0;
3251
3252         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3253                 rc = goya_test_queue(hdev, i);
3254                 if (rc)
3255                         ret_val = -EINVAL;
3256         }
3257
3258         return ret_val;
3259 }
3260
3261 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3262                                         gfp_t mem_flags, dma_addr_t *dma_handle)
3263 {
3264         void *kernel_addr;
3265
3266         if (size > GOYA_DMA_POOL_BLK_SIZE)
3267                 return NULL;
3268
3269         kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3270
3271         /* Shift to the device's base physical address of host memory */
3272         if (kernel_addr)
3273                 *dma_handle += HOST_PHYS_BASE;
3274
3275         return kernel_addr;
3276 }
3277
3278 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3279                                 dma_addr_t dma_addr)
3280 {
3281         /* Cancel the device's base physical address of host memory */
3282         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3283
3284         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3285 }
3286
3287 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3288                                         dma_addr_t *dma_handle)
3289 {
3290         void *vaddr;
3291
3292         vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3293         *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3294                         VA_CPU_ACCESSIBLE_MEM_ADDR;
3295
3296         return vaddr;
3297 }
3298
3299 void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3300                                         void *vaddr)
3301 {
3302         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3303 }
3304
3305 static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3306                                 int nents, enum dma_data_direction dir)
3307 {
3308         struct scatterlist *sg;
3309         int i;
3310
3311         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3312                 return -ENOMEM;
3313
3314         /* Shift to the device's base physical address of host memory */
3315         for_each_sg(sgl, sg, nents, i)
3316                 sg->dma_address += HOST_PHYS_BASE;
3317
3318         return 0;
3319 }
3320
3321 static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3322                                 int nents, enum dma_data_direction dir)
3323 {
3324         struct scatterlist *sg;
3325         int i;
3326
3327         /* Cancel the device's base physical address of host memory */
3328         for_each_sg(sgl, sg, nents, i)
3329                 sg->dma_address -= HOST_PHYS_BASE;
3330
3331         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3332 }
3333
3334 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3335 {
3336         struct scatterlist *sg, *sg_next_iter;
3337         u32 count, dma_desc_cnt;
3338         u64 len, len_next;
3339         dma_addr_t addr, addr_next;
3340
3341         dma_desc_cnt = 0;
3342
3343         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3344
3345                 len = sg_dma_len(sg);
3346                 addr = sg_dma_address(sg);
3347
3348                 if (len == 0)
3349                         break;
3350
3351                 while ((count + 1) < sgt->nents) {
3352                         sg_next_iter = sg_next(sg);
3353                         len_next = sg_dma_len(sg_next_iter);
3354                         addr_next = sg_dma_address(sg_next_iter);
3355
3356                         if (len_next == 0)
3357                                 break;
3358
3359                         if ((addr + len == addr_next) &&
3360                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3361                                 len += len_next;
3362                                 count++;
3363                                 sg = sg_next_iter;
3364                         } else {
3365                                 break;
3366                         }
3367                 }
3368
3369                 dma_desc_cnt++;
3370         }
3371
3372         return dma_desc_cnt * sizeof(struct packet_lin_dma);
3373 }
3374
3375 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3376                                 struct hl_cs_parser *parser,
3377                                 struct packet_lin_dma *user_dma_pkt,
3378                                 u64 addr, enum dma_data_direction dir)
3379 {
3380         struct hl_userptr *userptr;
3381         int rc;
3382
3383         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3384                         parser->job_userptr_list, &userptr))
3385                 goto already_pinned;
3386
3387         userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
3388         if (!userptr)
3389                 return -ENOMEM;
3390
3391         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3392                                 userptr);
3393         if (rc)
3394                 goto free_userptr;
3395
3396         list_add_tail(&userptr->job_node, parser->job_userptr_list);
3397
3398         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3399                                         userptr->sgt->nents, dir);
3400         if (rc) {
3401                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3402                 goto unpin_memory;
3403         }
3404
3405         userptr->dma_mapped = true;
3406         userptr->dir = dir;
3407
3408 already_pinned:
3409         parser->patched_cb_size +=
3410                         goya_get_dma_desc_list_size(hdev, userptr->sgt);
3411
3412         return 0;
3413
3414 unpin_memory:
3415         list_del(&userptr->job_node);
3416         hl_unpin_host_memory(hdev, userptr);
3417 free_userptr:
3418         kfree(userptr);
3419         return rc;
3420 }
3421
3422 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3423                                 struct hl_cs_parser *parser,
3424                                 struct packet_lin_dma *user_dma_pkt)
3425 {
3426         u64 device_memory_addr, addr;
3427         enum dma_data_direction dir;
3428         enum goya_dma_direction user_dir;
3429         bool sram_addr = true;
3430         bool skip_host_mem_pin = false;
3431         bool user_memset;
3432         u32 ctl;
3433         int rc = 0;
3434
3435         ctl = le32_to_cpu(user_dma_pkt->ctl);
3436
3437         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3438                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3439
3440         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3441                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3442
3443         switch (user_dir) {
3444         case DMA_HOST_TO_DRAM:
3445                 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3446                 dir = DMA_TO_DEVICE;
3447                 sram_addr = false;
3448                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3449                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3450                 if (user_memset)
3451                         skip_host_mem_pin = true;
3452                 break;
3453
3454         case DMA_DRAM_TO_HOST:
3455                 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3456                 dir = DMA_FROM_DEVICE;
3457                 sram_addr = false;
3458                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3459                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3460                 break;
3461
3462         case DMA_HOST_TO_SRAM:
3463                 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3464                 dir = DMA_TO_DEVICE;
3465                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3466                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3467                 if (user_memset)
3468                         skip_host_mem_pin = true;
3469                 break;
3470
3471         case DMA_SRAM_TO_HOST:
3472                 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3473                 dir = DMA_FROM_DEVICE;
3474                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3475                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3476                 break;
3477         default:
3478                 dev_err(hdev->dev, "DMA direction is undefined\n");
3479                 return -EFAULT;
3480         }
3481
3482         if (sram_addr) {
3483                 if (!hl_mem_area_inside_range(device_memory_addr,
3484                                 le32_to_cpu(user_dma_pkt->tsize),
3485                                 hdev->asic_prop.sram_user_base_address,
3486                                 hdev->asic_prop.sram_end_address)) {
3487
3488                         dev_err(hdev->dev,
3489                                 "SRAM address 0x%llx + 0x%x is invalid\n",
3490                                 device_memory_addr,
3491                                 user_dma_pkt->tsize);
3492                         return -EFAULT;
3493                 }
3494         } else {
3495                 if (!hl_mem_area_inside_range(device_memory_addr,
3496                                 le32_to_cpu(user_dma_pkt->tsize),
3497                                 hdev->asic_prop.dram_user_base_address,
3498                                 hdev->asic_prop.dram_end_address)) {
3499
3500                         dev_err(hdev->dev,
3501                                 "DRAM address 0x%llx + 0x%x is invalid\n",
3502                                 device_memory_addr,
3503                                 user_dma_pkt->tsize);
3504                         return -EFAULT;
3505                 }
3506         }
3507
3508         if (skip_host_mem_pin)
3509                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3510         else {
3511                 if ((dir == DMA_TO_DEVICE) &&
3512                                 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3513                         dev_err(hdev->dev,
3514                                 "Can't DMA from host on queue other then 1\n");
3515                         return -EFAULT;
3516                 }
3517
3518                 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3519                                                 addr, dir);
3520         }
3521
3522         return rc;
3523 }
3524
3525 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3526                                 struct hl_cs_parser *parser,
3527                                 struct packet_lin_dma *user_dma_pkt)
3528 {
3529         u64 sram_memory_addr, dram_memory_addr;
3530         enum goya_dma_direction user_dir;
3531         u32 ctl;
3532
3533         ctl = le32_to_cpu(user_dma_pkt->ctl);
3534         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3535                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3536
3537         if (user_dir == DMA_DRAM_TO_SRAM) {
3538                 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3539                 dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3540                 sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3541         } else {
3542                 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3543                 sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3544                 dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3545         }
3546
3547         if (!hl_mem_area_inside_range(sram_memory_addr,
3548                                 le32_to_cpu(user_dma_pkt->tsize),
3549                                 hdev->asic_prop.sram_user_base_address,
3550                                 hdev->asic_prop.sram_end_address)) {
3551                 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3552                         sram_memory_addr, user_dma_pkt->tsize);
3553                 return -EFAULT;
3554         }
3555
3556         if (!hl_mem_area_inside_range(dram_memory_addr,
3557                                 le32_to_cpu(user_dma_pkt->tsize),
3558                                 hdev->asic_prop.dram_user_base_address,
3559                                 hdev->asic_prop.dram_end_address)) {
3560                 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3561                         dram_memory_addr, user_dma_pkt->tsize);
3562                 return -EFAULT;
3563         }
3564
3565         parser->patched_cb_size += sizeof(*user_dma_pkt);
3566
3567         return 0;
3568 }
3569
3570 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3571                                 struct hl_cs_parser *parser,
3572                                 struct packet_lin_dma *user_dma_pkt)
3573 {
3574         enum goya_dma_direction user_dir;
3575         u32 ctl;
3576         int rc;
3577
3578         dev_dbg(hdev->dev, "DMA packet details:\n");
3579         dev_dbg(hdev->dev, "source == 0x%llx\n",
3580                 le64_to_cpu(user_dma_pkt->src_addr));
3581         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3582                 le64_to_cpu(user_dma_pkt->dst_addr));
3583         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3584
3585         ctl = le32_to_cpu(user_dma_pkt->ctl);
3586         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3587                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3588
3589         /*
3590          * Special handling for DMA with size 0. The H/W has a bug where
3591          * this can cause the QMAN DMA to get stuck, so block it here.
3592          */
3593         if (user_dma_pkt->tsize == 0) {
3594                 dev_err(hdev->dev,
3595                         "Got DMA with size 0, might reset the device\n");
3596                 return -EINVAL;
3597         }
3598
3599         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3600                 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3601         else
3602                 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3603
3604         return rc;
3605 }
3606
3607 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3608                                 struct hl_cs_parser *parser,
3609                                 struct packet_lin_dma *user_dma_pkt)
3610 {
3611         dev_dbg(hdev->dev, "DMA packet details:\n");
3612         dev_dbg(hdev->dev, "source == 0x%llx\n",
3613                 le64_to_cpu(user_dma_pkt->src_addr));
3614         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3615                 le64_to_cpu(user_dma_pkt->dst_addr));
3616         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3617
3618         /*
3619          * WA for HW-23.
3620          * We can't allow user to read from Host using QMANs other than 1.
3621          * PMMU and HPMMU addresses are equal, check only one of them.
3622          */
3623         if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3624                 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3625                                 le32_to_cpu(user_dma_pkt->tsize),
3626                                 hdev->asic_prop.pmmu.start_addr,
3627                                 hdev->asic_prop.pmmu.end_addr)) {
3628                 dev_err(hdev->dev,
3629                         "Can't DMA from host on queue other then 1\n");
3630                 return -EFAULT;
3631         }
3632
3633         if (user_dma_pkt->tsize == 0) {
3634                 dev_err(hdev->dev,
3635                         "Got DMA with size 0, might reset the device\n");
3636                 return -EINVAL;
3637         }
3638
3639         parser->patched_cb_size += sizeof(*user_dma_pkt);
3640
3641         return 0;
3642 }
3643
3644 static int goya_validate_wreg32(struct hl_device *hdev,
3645                                 struct hl_cs_parser *parser,
3646                                 struct packet_wreg32 *wreg_pkt)
3647 {
3648         struct goya_device *goya = hdev->asic_specific;
3649         u32 sob_start_addr, sob_end_addr;
3650         u16 reg_offset;
3651
3652         reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3653                         GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3654
3655         dev_dbg(hdev->dev, "WREG32 packet details:\n");
3656         dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3657         dev_dbg(hdev->dev, "value      == 0x%x\n",
3658                 le32_to_cpu(wreg_pkt->value));
3659
3660         if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3661                 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3662                         reg_offset);
3663                 return -EPERM;
3664         }
3665
3666         /*
3667          * With MMU, DMA channels are not secured, so it doesn't matter where
3668          * the WR COMP will be written to because it will go out with
3669          * non-secured property
3670          */
3671         if (goya->hw_cap_initialized & HW_CAP_MMU)
3672                 return 0;
3673
3674         sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3675         sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3676
3677         if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3678                         (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3679
3680                 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3681                         wreg_pkt->value);
3682                 return -EPERM;
3683         }
3684
3685         return 0;
3686 }
3687
3688 static int goya_validate_cb(struct hl_device *hdev,
3689                         struct hl_cs_parser *parser, bool is_mmu)
3690 {
3691         u32 cb_parsed_length = 0;
3692         int rc = 0;
3693
3694         parser->patched_cb_size = 0;
3695
3696         /* cb_user_size is more than 0 so loop will always be executed */
3697         while (cb_parsed_length < parser->user_cb_size) {
3698                 enum packet_id pkt_id;
3699                 u16 pkt_size;
3700                 struct goya_packet *user_pkt;
3701
3702                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3703
3704                 pkt_id = (enum packet_id) (
3705                                 (le64_to_cpu(user_pkt->header) &
3706                                 PACKET_HEADER_PACKET_ID_MASK) >>
3707                                         PACKET_HEADER_PACKET_ID_SHIFT);
3708
3709                 if (!validate_packet_id(pkt_id)) {
3710                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3711                         rc = -EINVAL;
3712                         break;
3713                 }
3714
3715                 pkt_size = goya_packet_sizes[pkt_id];
3716                 cb_parsed_length += pkt_size;
3717                 if (cb_parsed_length > parser->user_cb_size) {
3718                         dev_err(hdev->dev,
3719                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3720                         rc = -EINVAL;
3721                         break;
3722                 }
3723
3724                 switch (pkt_id) {
3725                 case PACKET_WREG_32:
3726                         /*
3727                          * Although it is validated after copy in patch_cb(),
3728                          * need to validate here as well because patch_cb() is
3729                          * not called in MMU path while this function is called
3730                          */
3731                         rc = goya_validate_wreg32(hdev,
3732                                 parser, (struct packet_wreg32 *) user_pkt);
3733                         parser->patched_cb_size += pkt_size;
3734                         break;
3735
3736                 case PACKET_WREG_BULK:
3737                         dev_err(hdev->dev,
3738                                 "User not allowed to use WREG_BULK\n");
3739                         rc = -EPERM;
3740                         break;
3741
3742                 case PACKET_MSG_PROT:
3743                         dev_err(hdev->dev,
3744                                 "User not allowed to use MSG_PROT\n");
3745                         rc = -EPERM;
3746                         break;
3747
3748                 case PACKET_CP_DMA:
3749                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3750                         rc = -EPERM;
3751                         break;
3752
3753                 case PACKET_STOP:
3754                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3755                         rc = -EPERM;
3756                         break;
3757
3758                 case PACKET_LIN_DMA:
3759                         if (is_mmu)
3760                                 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3761                                         (struct packet_lin_dma *) user_pkt);
3762                         else
3763                                 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3764                                         (struct packet_lin_dma *) user_pkt);
3765                         break;
3766
3767                 case PACKET_MSG_LONG:
3768                 case PACKET_MSG_SHORT:
3769                 case PACKET_FENCE:
3770                 case PACKET_NOP:
3771                         parser->patched_cb_size += pkt_size;
3772                         break;
3773
3774                 default:
3775                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3776                                 pkt_id);
3777                         rc = -EINVAL;
3778                         break;
3779                 }
3780
3781                 if (rc)
3782                         break;
3783         }
3784
3785         /*
3786          * The new CB should have space at the end for two MSG_PROT packets:
3787          * 1. A packet that will act as a completion packet
3788          * 2. A packet that will generate MSI-X interrupt
3789          */
3790         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3791
3792         return rc;
3793 }
3794
3795 static int goya_patch_dma_packet(struct hl_device *hdev,
3796                                 struct hl_cs_parser *parser,
3797                                 struct packet_lin_dma *user_dma_pkt,
3798                                 struct packet_lin_dma *new_dma_pkt,
3799                                 u32 *new_dma_pkt_size)
3800 {
3801         struct hl_userptr *userptr;
3802         struct scatterlist *sg, *sg_next_iter;
3803         u32 count, dma_desc_cnt;
3804         u64 len, len_next;
3805         dma_addr_t dma_addr, dma_addr_next;
3806         enum goya_dma_direction user_dir;
3807         u64 device_memory_addr, addr;
3808         enum dma_data_direction dir;
3809         struct sg_table *sgt;
3810         bool skip_host_mem_pin = false;
3811         bool user_memset;
3812         u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3813
3814         ctl = le32_to_cpu(user_dma_pkt->ctl);
3815
3816         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3817                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3818
3819         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3820                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3821
3822         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3823                         (user_dma_pkt->tsize == 0)) {
3824                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3825                 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3826                 return 0;
3827         }
3828
3829         if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3830                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3831                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3832                 dir = DMA_TO_DEVICE;
3833                 if (user_memset)
3834                         skip_host_mem_pin = true;
3835         } else {
3836                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3837                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3838                 dir = DMA_FROM_DEVICE;
3839         }
3840
3841         if ((!skip_host_mem_pin) &&
3842                 (hl_userptr_is_pinned(hdev, addr,
3843                         le32_to_cpu(user_dma_pkt->tsize),
3844                         parser->job_userptr_list, &userptr) == false)) {
3845                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3846                                 addr, user_dma_pkt->tsize);
3847                 return -EFAULT;
3848         }
3849
3850         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3851                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3852                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3853                 return 0;
3854         }
3855
3856         user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3857
3858         user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3859
3860         sgt = userptr->sgt;
3861         dma_desc_cnt = 0;
3862
3863         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3864                 len = sg_dma_len(sg);
3865                 dma_addr = sg_dma_address(sg);
3866
3867                 if (len == 0)
3868                         break;
3869
3870                 while ((count + 1) < sgt->nents) {
3871                         sg_next_iter = sg_next(sg);
3872                         len_next = sg_dma_len(sg_next_iter);
3873                         dma_addr_next = sg_dma_address(sg_next_iter);
3874
3875                         if (len_next == 0)
3876                                 break;
3877
3878                         if ((dma_addr + len == dma_addr_next) &&
3879                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3880                                 len += len_next;
3881                                 count++;
3882                                 sg = sg_next_iter;
3883                         } else {
3884                                 break;
3885                         }
3886                 }
3887
3888                 ctl = le32_to_cpu(user_dma_pkt->ctl);
3889                 if (likely(dma_desc_cnt))
3890                         ctl &= ~GOYA_PKT_CTL_EB_MASK;
3891                 ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3892                                 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3893                 new_dma_pkt->ctl = cpu_to_le32(ctl);
3894                 new_dma_pkt->tsize = cpu_to_le32((u32) len);
3895
3896                 if (dir == DMA_TO_DEVICE) {
3897                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3898                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3899                 } else {
3900                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3901                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3902                 }
3903
3904                 if (!user_memset)
3905                         device_memory_addr += len;
3906                 dma_desc_cnt++;
3907                 new_dma_pkt++;
3908         }
3909
3910         if (!dma_desc_cnt) {
3911                 dev_err(hdev->dev,
3912                         "Error of 0 SG entries when patching DMA packet\n");
3913                 return -EFAULT;
3914         }
3915
3916         /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3917         new_dma_pkt--;
3918         new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3919
3920         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3921
3922         return 0;
3923 }
3924
3925 static int goya_patch_cb(struct hl_device *hdev,
3926                                 struct hl_cs_parser *parser)
3927 {
3928         u32 cb_parsed_length = 0;
3929         u32 cb_patched_cur_length = 0;
3930         int rc = 0;
3931
3932         /* cb_user_size is more than 0 so loop will always be executed */
3933         while (cb_parsed_length < parser->user_cb_size) {
3934                 enum packet_id pkt_id;
3935                 u16 pkt_size;
3936                 u32 new_pkt_size = 0;
3937                 struct goya_packet *user_pkt, *kernel_pkt;
3938
3939                 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
3940                 kernel_pkt = parser->patched_cb->kernel_address +
3941                                         cb_patched_cur_length;
3942
3943                 pkt_id = (enum packet_id) (
3944                                 (le64_to_cpu(user_pkt->header) &
3945                                 PACKET_HEADER_PACKET_ID_MASK) >>
3946                                         PACKET_HEADER_PACKET_ID_SHIFT);
3947
3948                 if (!validate_packet_id(pkt_id)) {
3949                         dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
3950                         rc = -EINVAL;
3951                         break;
3952                 }
3953
3954                 pkt_size = goya_packet_sizes[pkt_id];
3955                 cb_parsed_length += pkt_size;
3956                 if (cb_parsed_length > parser->user_cb_size) {
3957                         dev_err(hdev->dev,
3958                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3959                         rc = -EINVAL;
3960                         break;
3961                 }
3962
3963                 switch (pkt_id) {
3964                 case PACKET_LIN_DMA:
3965                         rc = goya_patch_dma_packet(hdev, parser,
3966                                         (struct packet_lin_dma *) user_pkt,
3967                                         (struct packet_lin_dma *) kernel_pkt,
3968                                         &new_pkt_size);
3969                         cb_patched_cur_length += new_pkt_size;
3970                         break;
3971
3972                 case PACKET_WREG_32:
3973                         memcpy(kernel_pkt, user_pkt, pkt_size);
3974                         cb_patched_cur_length += pkt_size;
3975                         rc = goya_validate_wreg32(hdev, parser,
3976                                         (struct packet_wreg32 *) kernel_pkt);
3977                         break;
3978
3979                 case PACKET_WREG_BULK:
3980                         dev_err(hdev->dev,
3981                                 "User not allowed to use WREG_BULK\n");
3982                         rc = -EPERM;
3983                         break;
3984
3985                 case PACKET_MSG_PROT:
3986                         dev_err(hdev->dev,
3987                                 "User not allowed to use MSG_PROT\n");
3988                         rc = -EPERM;
3989                         break;
3990
3991                 case PACKET_CP_DMA:
3992                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3993                         rc = -EPERM;
3994                         break;
3995
3996                 case PACKET_STOP:
3997                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3998                         rc = -EPERM;
3999                         break;
4000
4001                 case PACKET_MSG_LONG:
4002                 case PACKET_MSG_SHORT:
4003                 case PACKET_FENCE:
4004                 case PACKET_NOP:
4005                         memcpy(kernel_pkt, user_pkt, pkt_size);
4006                         cb_patched_cur_length += pkt_size;
4007                         break;
4008
4009                 default:
4010                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4011                                 pkt_id);
4012                         rc = -EINVAL;
4013                         break;
4014                 }
4015
4016                 if (rc)
4017                         break;
4018         }
4019
4020         return rc;
4021 }
4022
4023 static int goya_parse_cb_mmu(struct hl_device *hdev,
4024                 struct hl_cs_parser *parser)
4025 {
4026         u64 patched_cb_handle;
4027         u32 patched_cb_size;
4028         struct hl_cb *user_cb;
4029         int rc;
4030
4031         /*
4032          * The new CB should have space at the end for two MSG_PROT pkt:
4033          * 1. A packet that will act as a completion packet
4034          * 2. A packet that will generate MSI-X interrupt
4035          */
4036         parser->patched_cb_size = parser->user_cb_size +
4037                         sizeof(struct packet_msg_prot) * 2;
4038
4039         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4040                                 parser->patched_cb_size, false, false,
4041                                 &patched_cb_handle);
4042
4043         if (rc) {
4044                 dev_err(hdev->dev,
4045                         "Failed to allocate patched CB for DMA CS %d\n",
4046                         rc);
4047                 return rc;
4048         }
4049
4050         patched_cb_handle >>= PAGE_SHIFT;
4051         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4052                                 (u32) patched_cb_handle);
4053         /* hl_cb_get should never fail here */
4054         if (!parser->patched_cb) {
4055                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
4056                         (u32) patched_cb_handle);
4057                 rc = -EFAULT;
4058                 goto out;
4059         }
4060
4061         /*
4062          * The check that parser->user_cb_size <= parser->user_cb->size was done
4063          * in validate_queue_index().
4064          */
4065         memcpy(parser->patched_cb->kernel_address,
4066                 parser->user_cb->kernel_address,
4067                 parser->user_cb_size);
4068
4069         patched_cb_size = parser->patched_cb_size;
4070
4071         /* validate patched CB instead of user CB */
4072         user_cb = parser->user_cb;
4073         parser->user_cb = parser->patched_cb;
4074         rc = goya_validate_cb(hdev, parser, true);
4075         parser->user_cb = user_cb;
4076
4077         if (rc) {
4078                 hl_cb_put(parser->patched_cb);
4079                 goto out;
4080         }
4081
4082         if (patched_cb_size != parser->patched_cb_size) {
4083                 dev_err(hdev->dev, "user CB size mismatch\n");
4084                 hl_cb_put(parser->patched_cb);
4085                 rc = -EINVAL;
4086                 goto out;
4087         }
4088
4089 out:
4090         /*
4091          * Always call cb destroy here because we still have 1 reference
4092          * to it by calling cb_get earlier. After the job will be completed,
4093          * cb_put will release it, but here we want to remove it from the
4094          * idr
4095          */
4096         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4097                                         patched_cb_handle << PAGE_SHIFT);
4098
4099         return rc;
4100 }
4101
4102 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
4103                                 struct hl_cs_parser *parser)
4104 {
4105         u64 patched_cb_handle;
4106         int rc;
4107
4108         rc = goya_validate_cb(hdev, parser, false);
4109
4110         if (rc)
4111                 goto free_userptr;
4112
4113         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
4114                                 parser->patched_cb_size, false, false,
4115                                 &patched_cb_handle);
4116         if (rc) {
4117                 dev_err(hdev->dev,
4118                         "Failed to allocate patched CB for DMA CS %d\n", rc);
4119                 goto free_userptr;
4120         }
4121
4122         patched_cb_handle >>= PAGE_SHIFT;
4123         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4124                                 (u32) patched_cb_handle);
4125         /* hl_cb_get should never fail here */
4126         if (!parser->patched_cb) {
4127                 dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
4128                         (u32) patched_cb_handle);
4129                 rc = -EFAULT;
4130                 goto out;
4131         }
4132
4133         rc = goya_patch_cb(hdev, parser);
4134
4135         if (rc)
4136                 hl_cb_put(parser->patched_cb);
4137
4138 out:
4139         /*
4140          * Always call cb destroy here because we still have 1 reference
4141          * to it by calling cb_get earlier. After the job will be completed,
4142          * cb_put will release it, but here we want to remove it from the
4143          * idr
4144          */
4145         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4146                                 patched_cb_handle << PAGE_SHIFT);
4147
4148 free_userptr:
4149         if (rc)
4150                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4151         return rc;
4152 }
4153
4154 static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
4155                                         struct hl_cs_parser *parser)
4156 {
4157         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4158         struct goya_device *goya = hdev->asic_specific;
4159
4160         if (goya->hw_cap_initialized & HW_CAP_MMU)
4161                 return 0;
4162
4163         /* For internal queue jobs, just check if CB address is valid */
4164         if (hl_mem_area_inside_range(
4165                         (u64) (uintptr_t) parser->user_cb,
4166                         parser->user_cb_size,
4167                         asic_prop->sram_user_base_address,
4168                         asic_prop->sram_end_address))
4169                 return 0;
4170
4171         if (hl_mem_area_inside_range(
4172                         (u64) (uintptr_t) parser->user_cb,
4173                         parser->user_cb_size,
4174                         asic_prop->dram_user_base_address,
4175                         asic_prop->dram_end_address))
4176                 return 0;
4177
4178         dev_err(hdev->dev,
4179                 "Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
4180                 parser->user_cb, parser->user_cb_size);
4181
4182         return -EFAULT;
4183 }
4184
4185 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4186 {
4187         struct goya_device *goya = hdev->asic_specific;
4188
4189         if (parser->queue_type == QUEUE_TYPE_INT)
4190                 return goya_parse_cb_no_ext_queue(hdev, parser);
4191
4192         if (goya->hw_cap_initialized & HW_CAP_MMU)
4193                 return goya_parse_cb_mmu(hdev, parser);
4194         else
4195                 return goya_parse_cb_no_mmu(hdev, parser);
4196 }
4197
4198 void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
4199                                 u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec,
4200                                 bool eb)
4201 {
4202         struct packet_msg_prot *cq_pkt;
4203         u32 tmp;
4204
4205         cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
4206
4207         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4208                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
4209                         (1 << GOYA_PKT_CTL_MB_SHIFT);
4210         cq_pkt->ctl = cpu_to_le32(tmp);
4211         cq_pkt->value = cpu_to_le32(cq_val);
4212         cq_pkt->addr = cpu_to_le64(cq_addr);
4213
4214         cq_pkt++;
4215
4216         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4217                         (1 << GOYA_PKT_CTL_MB_SHIFT);
4218         cq_pkt->ctl = cpu_to_le32(tmp);
4219         cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
4220         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
4221 }
4222
4223 void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4224 {
4225         WREG32(mmCPU_EQ_CI, val);
4226 }
4227
4228 void goya_restore_phase_topology(struct hl_device *hdev)
4229 {
4230
4231 }
4232
4233 static void goya_clear_sm_regs(struct hl_device *hdev)
4234 {
4235         int i, num_of_sob_in_longs, num_of_mon_in_longs;
4236
4237         num_of_sob_in_longs =
4238                 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4239
4240         num_of_mon_in_longs =
4241                 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4242
4243         for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4244                 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4245
4246         for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4247                 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4248
4249         /* Flush all WREG to prevent race */
4250         i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4251 }
4252
4253 /*
4254  * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
4255  *                       address.
4256  *
4257  * @hdev:       pointer to hl_device structure
4258  * @addr:       device or host mapped address
4259  * @val:        returned value
4260  *
4261  * In case of DDR address that is not mapped into the default aperture that
4262  * the DDR bar exposes, the function will configure the iATU so that the DDR
4263  * bar will be positioned at a base address that allows reading from the
4264  * required address. Configuring the iATU during normal operation can
4265  * lead to undefined behavior and therefore, should be done with extreme care
4266  *
4267  */
4268 static int goya_debugfs_read32(struct hl_device *hdev, u64 addr,
4269                         bool user_address, u32 *val)
4270 {
4271         struct asic_fixed_properties *prop = &hdev->asic_prop;
4272         u64 ddr_bar_addr, host_phys_end;
4273         int rc = 0;
4274
4275         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
4276
4277         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4278                 *val = RREG32(addr - CFG_BASE);
4279
4280         } else if ((addr >= SRAM_BASE_ADDR) &&
4281                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4282
4283                 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4284                                 (addr - SRAM_BASE_ADDR));
4285
4286         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4287
4288                 u64 bar_base_addr = DRAM_PHYS_BASE +
4289                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4290
4291                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4292                 if (ddr_bar_addr != U64_MAX) {
4293                         *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4294                                                 (addr - bar_base_addr));
4295
4296                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4297                                                         ddr_bar_addr);
4298                 }
4299                 if (ddr_bar_addr == U64_MAX)
4300                         rc = -EIO;
4301
4302         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
4303                         user_address && !iommu_present(&pci_bus_type)) {
4304                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4305
4306         } else {
4307                 rc = -EFAULT;
4308         }
4309
4310         return rc;
4311 }
4312
4313 /*
4314  * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4315  *                        address.
4316  *
4317  * @hdev:       pointer to hl_device structure
4318  * @addr:       device or host mapped address
4319  * @val:        returned value
4320  *
4321  * In case of DDR address that is not mapped into the default aperture that
4322  * the DDR bar exposes, the function will configure the iATU so that the DDR
4323  * bar will be positioned at a base address that allows writing to the
4324  * required address. Configuring the iATU during normal operation can
4325  * lead to undefined behavior and therefore, should be done with extreme care
4326  *
4327  */
4328 static int goya_debugfs_write32(struct hl_device *hdev, u64 addr,
4329                         bool user_address, u32 val)
4330 {
4331         struct asic_fixed_properties *prop = &hdev->asic_prop;
4332         u64 ddr_bar_addr, host_phys_end;
4333         int rc = 0;
4334
4335         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
4336
4337         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4338                 WREG32(addr - CFG_BASE, val);
4339
4340         } else if ((addr >= SRAM_BASE_ADDR) &&
4341                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4342
4343                 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4344                                         (addr - SRAM_BASE_ADDR));
4345
4346         } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4347
4348                 u64 bar_base_addr = DRAM_PHYS_BASE +
4349                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4350
4351                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4352                 if (ddr_bar_addr != U64_MAX) {
4353                         writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4354                                                 (addr - bar_base_addr));
4355
4356                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4357                                                         ddr_bar_addr);
4358                 }
4359                 if (ddr_bar_addr == U64_MAX)
4360                         rc = -EIO;
4361
4362         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
4363                         user_address && !iommu_present(&pci_bus_type)) {
4364                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4365
4366         } else {
4367                 rc = -EFAULT;
4368         }
4369
4370         return rc;
4371 }
4372
4373 static int goya_debugfs_read64(struct hl_device *hdev, u64 addr,
4374                         bool user_address, u64 *val)
4375 {
4376         struct asic_fixed_properties *prop = &hdev->asic_prop;
4377         u64 ddr_bar_addr, host_phys_end;
4378         int rc = 0;
4379
4380         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
4381
4382         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4383                 u32 val_l = RREG32(addr - CFG_BASE);
4384                 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4385
4386                 *val = (((u64) val_h) << 32) | val_l;
4387
4388         } else if ((addr >= SRAM_BASE_ADDR) &&
4389                         (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4390
4391                 *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4392                                 (addr - SRAM_BASE_ADDR));
4393
4394         } else if (addr <=
4395                    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4396
4397                 u64 bar_base_addr = DRAM_PHYS_BASE +
4398                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4399
4400                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4401                 if (ddr_bar_addr != U64_MAX) {
4402                         *val = readq(hdev->pcie_bar[DDR_BAR_ID] +
4403                                                 (addr - bar_base_addr));
4404
4405                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4406                                                         ddr_bar_addr);
4407                 }
4408                 if (ddr_bar_addr == U64_MAX)
4409                         rc = -EIO;
4410
4411         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
4412                         user_address && !iommu_present(&pci_bus_type)) {
4413                 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4414
4415         } else {
4416                 rc = -EFAULT;
4417         }
4418
4419         return rc;
4420 }
4421
4422 static int goya_debugfs_write64(struct hl_device *hdev, u64 addr,
4423                                 bool user_address, u64 val)
4424 {
4425         struct asic_fixed_properties *prop = &hdev->asic_prop;
4426         u64 ddr_bar_addr, host_phys_end;
4427         int rc = 0;
4428
4429         host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE;
4430
4431         if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4432                 WREG32(addr - CFG_BASE, lower_32_bits(val));
4433                 WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));
4434
4435         } else if ((addr >= SRAM_BASE_ADDR) &&
4436                         (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {
4437
4438                 writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4439                                         (addr - SRAM_BASE_ADDR));
4440
4441         } else if (addr <=
4442                    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4443
4444                 u64 bar_base_addr = DRAM_PHYS_BASE +
4445                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4446
4447                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4448                 if (ddr_bar_addr != U64_MAX) {
4449                         writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4450                                                 (addr - bar_base_addr));
4451
4452                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4453                                                         ddr_bar_addr);
4454                 }
4455                 if (ddr_bar_addr == U64_MAX)
4456                         rc = -EIO;
4457
4458         } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end &&
4459                         user_address && !iommu_present(&pci_bus_type)) {
4460                 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4461
4462         } else {
4463                 rc = -EFAULT;
4464         }
4465
4466         return rc;
4467 }
4468
4469 static int goya_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
4470                                 void *blob_addr)
4471 {
4472         dev_err(hdev->dev, "Reading via DMA is unimplemented yet\n");
4473         return -EPERM;
4474 }
4475
4476 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4477 {
4478         struct goya_device *goya = hdev->asic_specific;
4479
4480         if (hdev->reset_info.hard_reset_pending)
4481                 return U64_MAX;
4482
4483         return readq(hdev->pcie_bar[DDR_BAR_ID] +
4484                         (addr - goya->ddr_bar_cur_addr));
4485 }
4486
4487 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4488 {
4489         struct goya_device *goya = hdev->asic_specific;
4490
4491         if (hdev->reset_info.hard_reset_pending)
4492                 return;
4493
4494         writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4495                         (addr - goya->ddr_bar_cur_addr));
4496 }
4497
4498 static const char *_goya_get_event_desc(u16 event_type)
4499 {
4500         switch (event_type) {
4501         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4502                 return "PCIe_if";
4503         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4504         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4505         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4506         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4507         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4508         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4509         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4510         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4511                 return "TPC%d_ecc";
4512         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4513                 return "MME_ecc";
4514         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4515                 return "MME_ecc_ext";
4516         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4517                 return "MMU_ecc";
4518         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4519                 return "DMA_macro";
4520         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4521                 return "DMA_ecc";
4522         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4523                 return "CPU_if_ecc";
4524         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4525                 return "PSOC_mem";
4526         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4527                 return "PSOC_coresight";
4528         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4529                 return "SRAM%d";
4530         case GOYA_ASYNC_EVENT_ID_GIC500:
4531                 return "GIC500";
4532         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4533                 return "PLL%d";
4534         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4535                 return "AXI_ecc";
4536         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4537                 return "L2_ram_ecc";
4538         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4539                 return "PSOC_gpio_05_sw_reset";
4540         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4541                 return "PSOC_gpio_10_vrhot_icrit";
4542         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4543                 return "PCIe_dec";
4544         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4545         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4546         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4547         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4548         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4549         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4550         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4551         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4552                 return "TPC%d_dec";
4553         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4554                 return "MME_wacs";
4555         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4556                 return "MME_wacsd";
4557         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4558                 return "CPU_axi_splitter";
4559         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4560                 return "PSOC_axi_dec";
4561         case GOYA_ASYNC_EVENT_ID_PSOC:
4562                 return "PSOC";
4563         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4564         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4565         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4566         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4567         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4568         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4569         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4570         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4571                 return "TPC%d_krn_err";
4572         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4573                 return "TPC%d_cq";
4574         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4575                 return "TPC%d_qm";
4576         case GOYA_ASYNC_EVENT_ID_MME_QM:
4577                 return "MME_qm";
4578         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4579                 return "MME_cq";
4580         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4581                 return "DMA%d_qm";
4582         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4583                 return "DMA%d_ch";
4584         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4585         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4586         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4587         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4588         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4589         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4590         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4591         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4592                 return "TPC%d_bmon_spmu";
4593         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4594                 return "DMA_bm_ch%d";
4595         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4596                 return "POWER_ENV_S";
4597         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4598                 return "POWER_ENV_E";
4599         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4600                 return "THERMAL_ENV_S";
4601         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4602                 return "THERMAL_ENV_E";
4603         case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4604                 return "QUEUE_OUT_OF_SYNC";
4605         default:
4606                 return "N/A";
4607         }
4608 }
4609
4610 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4611 {
4612         u8 index;
4613
4614         switch (event_type) {
4615         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4616         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4617         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4618         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4619         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4620         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4621         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4622         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4623                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4624                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4625                 break;
4626         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4627                 index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4628                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4629                 break;
4630         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4631                 index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4632                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4633                 break;
4634         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4635         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4636         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4637         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4638         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4639         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4640         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4641         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4642                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4643                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4644                 break;
4645         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4646         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4647         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4648         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4649         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4650         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4651         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4652         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4653                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4654                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4655                 break;
4656         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4657                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4658                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4659                 break;
4660         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4661                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4662                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4663                 break;
4664         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4665                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4666                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4667                 break;
4668         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4669                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4670                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4671                 break;
4672         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4673         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4674         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4675         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4676         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4677         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4678         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4679         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4680                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4681                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4682                 break;
4683         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4684                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4685                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4686                 break;
4687         case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
4688                 snprintf(desc, size, _goya_get_event_desc(event_type));
4689                 break;
4690         default:
4691                 snprintf(desc, size, _goya_get_event_desc(event_type));
4692                 break;
4693         }
4694 }
4695
4696 static void goya_print_razwi_info(struct hl_device *hdev)
4697 {
4698         if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4699                 dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
4700                 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4701         }
4702
4703         if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4704                 dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
4705                 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4706         }
4707
4708         if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4709                 dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
4710                 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4711         }
4712
4713         if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4714                 dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
4715                 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4716         }
4717 }
4718
4719 static void goya_print_mmu_error_info(struct hl_device *hdev)
4720 {
4721         struct goya_device *goya = hdev->asic_specific;
4722         u64 addr;
4723         u32 val;
4724
4725         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4726                 return;
4727
4728         val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4729         if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4730                 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4731                 addr <<= 32;
4732                 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4733
4734                 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
4735                                         addr);
4736
4737                 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4738         }
4739 }
4740
4741 static void goya_print_out_of_sync_info(struct hl_device *hdev,
4742                                         struct cpucp_pkt_sync_err *sync_err)
4743 {
4744         struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
4745
4746         dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
4747                         sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
4748 }
4749
4750 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4751                                 bool razwi)
4752 {
4753         char desc[20] = "";
4754
4755         goya_get_event_desc(event_type, desc, sizeof(desc));
4756         dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4757                 event_type, desc);
4758
4759         if (razwi) {
4760                 goya_print_razwi_info(hdev);
4761                 goya_print_mmu_error_info(hdev);
4762         }
4763 }
4764
4765 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4766                 size_t irq_arr_size)
4767 {
4768         struct cpucp_unmask_irq_arr_packet *pkt;
4769         size_t total_pkt_size;
4770         u64 result;
4771         int rc;
4772         int irq_num_entries, irq_arr_index;
4773         __le32 *goya_irq_arr;
4774
4775         total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
4776                         irq_arr_size;
4777
4778         /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
4779         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4780
4781         /* total_pkt_size is casted to u16 later on */
4782         if (total_pkt_size > USHRT_MAX) {
4783                 dev_err(hdev->dev, "too many elements in IRQ array\n");
4784                 return -EINVAL;
4785         }
4786
4787         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4788         if (!pkt)
4789                 return -ENOMEM;
4790
4791         irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4792         pkt->length = cpu_to_le32(irq_num_entries);
4793
4794         /* We must perform any necessary endianness conversation on the irq
4795          * array being passed to the goya hardware
4796          */
4797         for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4798                         irq_arr_index < irq_num_entries ; irq_arr_index++)
4799                 goya_irq_arr[irq_arr_index] =
4800                                 cpu_to_le32(irq_arr[irq_arr_index]);
4801
4802         pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4803                                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
4804
4805         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4806                                                 total_pkt_size, 0, &result);
4807
4808         if (rc)
4809                 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4810
4811         kfree(pkt);
4812
4813         return rc;
4814 }
4815
4816 static int goya_non_hard_reset_late_init(struct hl_device *hdev)
4817 {
4818         /*
4819          * Unmask all IRQs since some could have been received
4820          * during the soft reset
4821          */
4822         return goya_unmask_irq_arr(hdev, goya_all_events,
4823                                         sizeof(goya_all_events));
4824 }
4825
4826 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4827 {
4828         struct cpucp_packet pkt;
4829         u64 result;
4830         int rc;
4831
4832         memset(&pkt, 0, sizeof(pkt));
4833
4834         pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
4835                                 CPUCP_PKT_CTL_OPCODE_SHIFT);
4836         pkt.value = cpu_to_le64(event_type);
4837
4838         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4839                                                 0, &result);
4840
4841         if (rc)
4842                 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4843
4844         return rc;
4845 }
4846
4847 static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
4848 {
4849         ktime_t zero_time = ktime_set(0, 0);
4850
4851         mutex_lock(&hdev->clk_throttling.lock);
4852
4853         switch (event_type) {
4854         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4855                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
4856                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
4857                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
4858                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
4859                 dev_info_ratelimited(hdev->dev,
4860                         "Clock throttling due to power consumption\n");
4861                 break;
4862
4863         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4864                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
4865                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
4866                 dev_info_ratelimited(hdev->dev,
4867                         "Power envelop is safe, back to optimal clock\n");
4868                 break;
4869
4870         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4871                 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
4872                 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
4873                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
4874                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
4875                 dev_info_ratelimited(hdev->dev,
4876                         "Clock throttling due to overheating\n");
4877                 break;
4878
4879         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4880                 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
4881                 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
4882                 dev_info_ratelimited(hdev->dev,
4883                         "Thermal envelop is safe, back to optimal clock\n");
4884                 break;
4885
4886         default:
4887                 dev_err(hdev->dev, "Received invalid clock change event %d\n",
4888                         event_type);
4889                 break;
4890         }
4891
4892         mutex_unlock(&hdev->clk_throttling.lock);
4893 }
4894
4895 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4896 {
4897         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4898         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4899                                 >> EQ_CTL_EVENT_TYPE_SHIFT);
4900         struct goya_device *goya = hdev->asic_specific;
4901
4902         if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
4903                 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
4904                                 event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
4905                 return;
4906         }
4907
4908         goya->events_stat[event_type]++;
4909         goya->events_stat_aggregate[event_type]++;
4910
4911         switch (event_type) {
4912         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4913         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4914         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4915         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4916         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4917         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4918         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4919         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4920         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4921         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4922         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4923         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4924         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4925         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4926         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4927         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4928         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4929         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4930         case GOYA_ASYNC_EVENT_ID_GIC500:
4931         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4932         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4933         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4934                 goya_print_irq_info(hdev, event_type, false);
4935                 if (hdev->hard_reset_on_fw_events)
4936                         hl_device_reset(hdev, (HL_DRV_RESET_HARD |
4937                                                 HL_DRV_RESET_FW_FATAL_ERR));
4938                 break;
4939
4940         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4941                 goya_print_irq_info(hdev, event_type, false);
4942                 if (hdev->hard_reset_on_fw_events)
4943                         hl_device_reset(hdev, HL_DRV_RESET_HARD);
4944                 break;
4945
4946         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4947         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4948         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4949         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4950         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4951         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4952         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4953         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4954         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4955         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4956         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4957         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4958         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4959         case GOYA_ASYNC_EVENT_ID_PSOC:
4960         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4961         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4962         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4963         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4964         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4965         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4966         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4967         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4968         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4969         case GOYA_ASYNC_EVENT_ID_MME_QM:
4970         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4971         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4972         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4973                 goya_print_irq_info(hdev, event_type, true);
4974                 goya_unmask_irq(hdev, event_type);
4975                 break;
4976
4977         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4978         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4979         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4980         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4981         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4982         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4983         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4984         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4985         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4986         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4987                 goya_print_irq_info(hdev, event_type, false);
4988                 goya_unmask_irq(hdev, event_type);
4989                 break;
4990
4991         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
4992         case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
4993         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
4994         case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
4995                 goya_print_clk_change_info(hdev, event_type);
4996                 goya_unmask_irq(hdev, event_type);
4997                 break;
4998
4999         case GOYA_ASYNC_EVENT_PKT_QUEUE_OUT_SYNC:
5000                 goya_print_irq_info(hdev, event_type, false);
5001                 goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
5002                 if (hdev->hard_reset_on_fw_events)
5003                         hl_device_reset(hdev, HL_DRV_RESET_HARD);
5004                 else
5005                         hl_fw_unmask_irq(hdev, event_type);
5006                 break;
5007
5008         default:
5009                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5010                                 event_type);
5011                 break;
5012         }
5013 }
5014
5015 void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
5016 {
5017         struct goya_device *goya = hdev->asic_specific;
5018
5019         if (aggregate) {
5020                 *size = (u32) sizeof(goya->events_stat_aggregate);
5021                 return goya->events_stat_aggregate;
5022         }
5023
5024         *size = (u32) sizeof(goya->events_stat);
5025         return goya->events_stat;
5026 }
5027
5028 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
5029                                 u64 val, bool is_dram)
5030 {
5031         struct packet_lin_dma *lin_dma_pkt;
5032         struct hl_cs_job *job;
5033         u32 cb_size, ctl;
5034         struct hl_cb *cb;
5035         int rc, lin_dma_pkts_cnt;
5036
5037         lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
5038         cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
5039                                                 sizeof(struct packet_msg_prot);
5040         cb = hl_cb_kernel_create(hdev, cb_size, false);
5041         if (!cb)
5042                 return -ENOMEM;
5043
5044         lin_dma_pkt = cb->kernel_address;
5045
5046         do {
5047                 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5048
5049                 ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
5050                                 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
5051                                 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
5052                                 (1 << GOYA_PKT_CTL_RB_SHIFT) |
5053                                 (1 << GOYA_PKT_CTL_MB_SHIFT));
5054                 ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
5055                                 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
5056                 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5057
5058                 lin_dma_pkt->src_addr = cpu_to_le64(val);
5059                 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
5060                 if (lin_dma_pkts_cnt > 1)
5061                         lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
5062                 else
5063                         lin_dma_pkt->tsize = cpu_to_le32(size);
5064
5065                 size -= SZ_2G;
5066                 addr += SZ_2G;
5067                 lin_dma_pkt++;
5068         } while (--lin_dma_pkts_cnt);
5069
5070         job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5071         if (!job) {
5072                 dev_err(hdev->dev, "Failed to allocate a new job\n");
5073                 rc = -ENOMEM;
5074                 goto release_cb;
5075         }
5076
5077         job->id = 0;
5078         job->user_cb = cb;
5079         atomic_inc(&job->user_cb->cs_cnt);
5080         job->user_cb_size = cb_size;
5081         job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
5082         job->patched_cb = job->user_cb;
5083         job->job_cb_size = job->user_cb_size;
5084
5085         hl_debugfs_add_job(hdev, job);
5086
5087         rc = goya_send_job_on_qman0(hdev, job);
5088
5089         hl_debugfs_remove_job(hdev, job);
5090         kfree(job);
5091         atomic_dec(&cb->cs_cnt);
5092
5093 release_cb:
5094         hl_cb_put(cb);
5095         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
5096
5097         return rc;
5098 }
5099
5100 int goya_context_switch(struct hl_device *hdev, u32 asid)
5101 {
5102         struct asic_fixed_properties *prop = &hdev->asic_prop;
5103         u64 addr = prop->sram_base_address, sob_addr;
5104         u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
5105         u64 val = 0x7777777777777777ull;
5106         int rc, dma_id;
5107         u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
5108                                         mmDMA_CH_0_WR_COMP_ADDR_LO;
5109
5110         rc = goya_memset_device_memory(hdev, addr, size, val, false);
5111         if (rc) {
5112                 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
5113                 return rc;
5114         }
5115
5116         /* we need to reset registers that the user is allowed to change */
5117         sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
5118         WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
5119
5120         for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
5121                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
5122                                                         (dma_id - 1) * 4;
5123                 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
5124                                                 lower_32_bits(sob_addr));
5125         }
5126
5127         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
5128
5129         goya_clear_sm_regs(hdev);
5130
5131         return 0;
5132 }
5133
5134 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
5135 {
5136         struct asic_fixed_properties *prop = &hdev->asic_prop;
5137         struct goya_device *goya = hdev->asic_specific;
5138         u64 addr = prop->mmu_pgt_addr;
5139         u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
5140                         MMU_CACHE_MNG_SIZE;
5141
5142         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5143                 return 0;
5144
5145         return goya_memset_device_memory(hdev, addr, size, 0, true);
5146 }
5147
5148 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
5149 {
5150         struct goya_device *goya = hdev->asic_specific;
5151         u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
5152         u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
5153         u64 val = 0x9999999999999999ull;
5154
5155         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5156                 return 0;
5157
5158         return goya_memset_device_memory(hdev, addr, size, val, true);
5159 }
5160
5161 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
5162 {
5163         struct asic_fixed_properties *prop = &hdev->asic_prop;
5164         struct goya_device *goya = hdev->asic_specific;
5165         s64 off, cpu_off;
5166         int rc;
5167
5168         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5169                 return 0;
5170
5171         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
5172                 rc = hl_mmu_map_page(hdev->kernel_ctx,
5173                         prop->dram_base_address + off,
5174                         prop->dram_base_address + off, PAGE_SIZE_2MB,
5175                         (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
5176                 if (rc) {
5177                         dev_err(hdev->dev, "Map failed for address 0x%llx\n",
5178                                 prop->dram_base_address + off);
5179                         goto unmap;
5180                 }
5181         }
5182
5183         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
5184                 rc = hl_mmu_map_page(hdev->kernel_ctx,
5185                         VA_CPU_ACCESSIBLE_MEM_ADDR,
5186                         hdev->cpu_accessible_dma_address,
5187                         PAGE_SIZE_2MB, true);
5188
5189                 if (rc) {
5190                         dev_err(hdev->dev,
5191                                 "Map failed for CPU accessible memory\n");
5192                         off -= PAGE_SIZE_2MB;
5193                         goto unmap;
5194                 }
5195         } else {
5196                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
5197                         rc = hl_mmu_map_page(hdev->kernel_ctx,
5198                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
5199                                 hdev->cpu_accessible_dma_address + cpu_off,
5200                                 PAGE_SIZE_4KB, true);
5201                         if (rc) {
5202                                 dev_err(hdev->dev,
5203                                         "Map failed for CPU accessible memory\n");
5204                                 cpu_off -= PAGE_SIZE_4KB;
5205                                 goto unmap_cpu;
5206                         }
5207                 }
5208         }
5209
5210         goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
5211         goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
5212         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
5213         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
5214
5215         /* Make sure configuration is flushed to device */
5216         RREG32(mmCPU_IF_AWUSER_OVR_EN);
5217
5218         goya->device_cpu_mmu_mappings_done = true;
5219
5220         return 0;
5221
5222 unmap_cpu:
5223         for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
5224                 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5225                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
5226                                 PAGE_SIZE_4KB, true))
5227                         dev_warn_ratelimited(hdev->dev,
5228                                 "failed to unmap address 0x%llx\n",
5229                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
5230 unmap:
5231         for (; off >= 0 ; off -= PAGE_SIZE_2MB)
5232                 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5233                                 prop->dram_base_address + off, PAGE_SIZE_2MB,
5234                                 true))
5235                         dev_warn_ratelimited(hdev->dev,
5236                                 "failed to unmap address 0x%llx\n",
5237                                 prop->dram_base_address + off);
5238
5239         return rc;
5240 }
5241
5242 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
5243 {
5244         struct asic_fixed_properties *prop = &hdev->asic_prop;
5245         struct goya_device *goya = hdev->asic_specific;
5246         u32 off, cpu_off;
5247
5248         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5249                 return;
5250
5251         if (!goya->device_cpu_mmu_mappings_done)
5252                 return;
5253
5254         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
5255         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
5256
5257         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
5258                 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5259                                 VA_CPU_ACCESSIBLE_MEM_ADDR,
5260                                 PAGE_SIZE_2MB, true))
5261                         dev_warn(hdev->dev,
5262                                 "Failed to unmap CPU accessible memory\n");
5263         } else {
5264                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
5265                         if (hl_mmu_unmap_page(hdev->kernel_ctx,
5266                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
5267                                         PAGE_SIZE_4KB,
5268                                         (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
5269                                 dev_warn_ratelimited(hdev->dev,
5270                                         "failed to unmap address 0x%llx\n",
5271                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
5272         }
5273
5274         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
5275                 if (hl_mmu_unmap_page(hdev->kernel_ctx,
5276                                 prop->dram_base_address + off, PAGE_SIZE_2MB,
5277                                 (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
5278                         dev_warn_ratelimited(hdev->dev,
5279                                         "Failed to unmap address 0x%llx\n",
5280                                         prop->dram_base_address + off);
5281
5282         goya->device_cpu_mmu_mappings_done = false;
5283 }
5284
5285 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
5286 {
5287         struct goya_device *goya = hdev->asic_specific;
5288         int i;
5289
5290         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5291                 return;
5292
5293         if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
5294                 dev_crit(hdev->dev, "asid %u is too big\n", asid);
5295                 return;
5296         }
5297
5298         /* zero the MMBP and ASID bits and then set the ASID */
5299         for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
5300                 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
5301 }
5302
5303 static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5304                                         u32 flags)
5305 {
5306         struct goya_device *goya = hdev->asic_specific;
5307         u32 status, timeout_usec;
5308         int rc;
5309
5310         if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
5311                 hdev->reset_info.hard_reset_pending)
5312                 return 0;
5313
5314         /* no need in L1 only invalidation in Goya */
5315         if (!is_hard)
5316                 return 0;
5317
5318         if (hdev->pldm)
5319                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5320         else
5321                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5322
5323         /* L0 & L1 invalidation */
5324         WREG32(mmSTLB_INV_ALL_START, 1);
5325
5326         rc = hl_poll_timeout(
5327                 hdev,
5328                 mmSTLB_INV_ALL_START,
5329                 status,
5330                 !status,
5331                 1000,
5332                 timeout_usec);
5333
5334         return rc;
5335 }
5336
5337 static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
5338                                                 bool is_hard, u32 flags,
5339                                                 u32 asid, u64 va, u64 size)
5340 {
5341         /* Treat as invalidate all because there is no range invalidation
5342          * in Goya
5343          */
5344         return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
5345 }
5346
5347 int goya_send_heartbeat(struct hl_device *hdev)
5348 {
5349         struct goya_device *goya = hdev->asic_specific;
5350
5351         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5352                 return 0;
5353
5354         return hl_fw_send_heartbeat(hdev);
5355 }
5356
5357 int goya_cpucp_info_get(struct hl_device *hdev)
5358 {
5359         struct goya_device *goya = hdev->asic_specific;
5360         struct asic_fixed_properties *prop = &hdev->asic_prop;
5361         u64 dram_size;
5362         int rc;
5363
5364         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5365                 return 0;
5366
5367         rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
5368                                         mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
5369                                         mmCPU_BOOT_ERR1);
5370         if (rc)
5371                 return rc;
5372
5373         dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
5374         if (dram_size) {
5375                 if ((!is_power_of_2(dram_size)) ||
5376                                 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5377                         dev_err(hdev->dev,
5378                                 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5379                                 dram_size);
5380                         dram_size = DRAM_PHYS_DEFAULT_SIZE;
5381                 }
5382
5383                 prop->dram_size = dram_size;
5384                 prop->dram_end_address = prop->dram_base_address + dram_size;
5385         }
5386
5387         if (!strlen(prop->cpucp_info.card_name))
5388                 strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
5389                                 CARD_NAME_MAX_LEN);
5390
5391         return 0;
5392 }
5393
5394 static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
5395                                         u8 mask_len, struct seq_file *s)
5396 {
5397         const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
5398         const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
5399         unsigned long *mask = (unsigned long *)mask_arr;
5400         u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
5401                 mme_arch_sts;
5402         bool is_idle = true, is_eng_idle;
5403         u64 offset;
5404         int i;
5405
5406         if (s)
5407                 seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
5408                                 "---  -------  ------------  -------------\n");
5409
5410         offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5411
5412         for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5413                 qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
5414                 dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
5415                 is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
5416                                 IS_DMA_IDLE(dma_core_sts0);
5417                 is_idle &= is_eng_idle;
5418
5419                 if (mask && !is_eng_idle)
5420                         set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
5421                 if (s)
5422                         seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
5423                                         qm_glbl_sts0, dma_core_sts0);
5424         }
5425
5426         if (s)
5427                 seq_puts(s,
5428                         "\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
5429                         "---  -------  ------------  --------------  ----------\n");
5430
5431         offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5432
5433         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5434                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
5435                 cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
5436                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
5437                 is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
5438                                 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
5439                                 IS_TPC_IDLE(tpc_cfg_sts);
5440                 is_idle &= is_eng_idle;
5441
5442                 if (mask && !is_eng_idle)
5443                         set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
5444                 if (s)
5445                         seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
5446                                 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
5447         }
5448
5449         if (s)
5450                 seq_puts(s,
5451                         "\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
5452                         "---  -------  ------------  --------------  -----------\n");
5453
5454         qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5455         cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5456         mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5457         is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5458                         IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5459                         IS_MME_IDLE(mme_arch_sts);
5460         is_idle &= is_eng_idle;
5461
5462         if (mask && !is_eng_idle)
5463                 set_bit(GOYA_ENGINE_ID_MME_0, mask);
5464         if (s) {
5465                 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5466                                 cmdq_glbl_sts0, mme_arch_sts);
5467                 seq_puts(s, "\n");
5468         }
5469
5470         return is_idle;
5471 }
5472
5473 static void goya_hw_queues_lock(struct hl_device *hdev)
5474         __acquires(&goya->hw_queues_lock)
5475 {
5476         struct goya_device *goya = hdev->asic_specific;
5477
5478         spin_lock(&goya->hw_queues_lock);
5479 }
5480
5481 static void goya_hw_queues_unlock(struct hl_device *hdev)
5482         __releases(&goya->hw_queues_lock)
5483 {
5484         struct goya_device *goya = hdev->asic_specific;
5485
5486         spin_unlock(&goya->hw_queues_lock);
5487 }
5488
5489 static u32 goya_get_pci_id(struct hl_device *hdev)
5490 {
5491         return hdev->pdev->device;
5492 }
5493
5494 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5495                                 size_t max_size)
5496 {
5497         struct goya_device *goya = hdev->asic_specific;
5498
5499         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5500                 return 0;
5501
5502         return hl_fw_get_eeprom_data(hdev, data, max_size);
5503 }
5504
5505 static void goya_cpu_init_scrambler_dram(struct hl_device *hdev)
5506 {
5507
5508 }
5509
5510 static int goya_ctx_init(struct hl_ctx *ctx)
5511 {
5512         if (ctx->asid != HL_KERNEL_ASID_ID)
5513                 goya_mmu_prepare(ctx->hdev, ctx->asid);
5514
5515         return 0;
5516 }
5517
5518 u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
5519 {
5520         return cq_idx;
5521 }
5522
5523 static u32 goya_get_signal_cb_size(struct hl_device *hdev)
5524 {
5525         return 0;
5526 }
5527
5528 static u32 goya_get_wait_cb_size(struct hl_device *hdev)
5529 {
5530         return 0;
5531 }
5532
5533 static u32 goya_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
5534                                 u32 size, bool eb)
5535 {
5536         return 0;
5537 }
5538
5539 static u32 goya_gen_wait_cb(struct hl_device *hdev,
5540                 struct hl_gen_wait_properties *prop)
5541 {
5542         return 0;
5543 }
5544
5545 static void goya_reset_sob(struct hl_device *hdev, void *data)
5546 {
5547
5548 }
5549
5550 static void goya_reset_sob_group(struct hl_device *hdev, u16 sob_group)
5551 {
5552
5553 }
5554
5555 static void goya_set_dma_mask_from_fw(struct hl_device *hdev)
5556 {
5557         hdev->dma_mask = 48;
5558 }
5559
5560 u64 goya_get_device_time(struct hl_device *hdev)
5561 {
5562         u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
5563
5564         return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
5565 }
5566
5567 static int goya_collective_wait_init_cs(struct hl_cs *cs)
5568 {
5569         return 0;
5570 }
5571
5572 static int goya_collective_wait_create_jobs(struct hl_device *hdev,
5573                 struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
5574                 u32 collective_engine_id, u32 encaps_signal_offset)
5575 {
5576         return -EINVAL;
5577 }
5578
5579 static void goya_ctx_fini(struct hl_ctx *ctx)
5580 {
5581
5582 }
5583
5584 static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
5585                         u32 *block_size, u32 *block_id)
5586 {
5587         return -EPERM;
5588 }
5589
5590 static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
5591                                 u32 block_id, u32 block_size)
5592 {
5593         return -EPERM;
5594 }
5595
5596 static void goya_enable_events_from_fw(struct hl_device *hdev)
5597 {
5598         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
5599                         GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
5600 }
5601
5602 static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
5603 {
5604         switch (pll_idx) {
5605         case HL_GOYA_CPU_PLL: return CPU_PLL;
5606         case HL_GOYA_PCI_PLL: return PCI_PLL;
5607         case HL_GOYA_MME_PLL: return MME_PLL;
5608         case HL_GOYA_TPC_PLL: return TPC_PLL;
5609         case HL_GOYA_IC_PLL: return IC_PLL;
5610         case HL_GOYA_MC_PLL: return MC_PLL;
5611         case HL_GOYA_EMMC_PLL: return EMMC_PLL;
5612         default: return -EINVAL;
5613         }
5614 }
5615
5616 static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
5617                                 struct hl_sync_to_engine_map *map)
5618 {
5619         /* Not implemented */
5620         return 0;
5621 }
5622
5623 static int goya_monitor_valid(struct hl_mon_state_dump *mon)
5624 {
5625         /* Not implemented */
5626         return 0;
5627 }
5628
5629 static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
5630                                 struct hl_device *hdev,
5631                                 struct hl_mon_state_dump *mon)
5632 {
5633         /* Not implemented */
5634         return 0;
5635 }
5636
5637
5638 static int goya_print_fences_single_engine(
5639         struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
5640         enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
5641         size_t *size, size_t *offset)
5642 {
5643         /* Not implemented */
5644         return 0;
5645 }
5646
5647
5648 static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
5649         .monitor_valid = goya_monitor_valid,
5650         .print_single_monitor = goya_print_single_monitor,
5651         .gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
5652         .print_fences_single_engine = goya_print_fences_single_engine,
5653 };
5654
5655 static void goya_state_dump_init(struct hl_device *hdev)
5656 {
5657         /* Not implemented */
5658         hdev->state_dump_specs.props = goya_state_dump_specs_props;
5659         hdev->state_dump_specs.funcs = goya_state_dump_funcs;
5660 }
5661
5662 static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
5663 {
5664         return 0;
5665 }
5666
5667 static u32 *goya_get_stream_master_qid_arr(void)
5668 {
5669         return NULL;
5670 }
5671
5672 static const struct hl_asic_funcs goya_funcs = {
5673         .early_init = goya_early_init,
5674         .early_fini = goya_early_fini,
5675         .late_init = goya_late_init,
5676         .late_fini = goya_late_fini,
5677         .sw_init = goya_sw_init,
5678         .sw_fini = goya_sw_fini,
5679         .hw_init = goya_hw_init,
5680         .hw_fini = goya_hw_fini,
5681         .halt_engines = goya_halt_engines,
5682         .suspend = goya_suspend,
5683         .resume = goya_resume,
5684         .mmap = goya_mmap,
5685         .ring_doorbell = goya_ring_doorbell,
5686         .pqe_write = goya_pqe_write,
5687         .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5688         .asic_dma_free_coherent = goya_dma_free_coherent,
5689         .scrub_device_mem = goya_scrub_device_mem,
5690         .get_int_queue_base = goya_get_int_queue_base,
5691         .test_queues = goya_test_queues,
5692         .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5693         .asic_dma_pool_free = goya_dma_pool_free,
5694         .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5695         .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5696         .hl_dma_unmap_sg = goya_dma_unmap_sg,
5697         .cs_parser = goya_cs_parser,
5698         .asic_dma_map_sg = goya_dma_map_sg,
5699         .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5700         .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5701         .update_eq_ci = goya_update_eq_ci,
5702         .context_switch = goya_context_switch,
5703         .restore_phase_topology = goya_restore_phase_topology,
5704         .debugfs_read32 = goya_debugfs_read32,
5705         .debugfs_write32 = goya_debugfs_write32,
5706         .debugfs_read64 = goya_debugfs_read64,
5707         .debugfs_write64 = goya_debugfs_write64,
5708         .debugfs_read_dma = goya_debugfs_read_dma,
5709         .add_device_attr = goya_add_device_attr,
5710         .handle_eqe = goya_handle_eqe,
5711         .get_events_stat = goya_get_events_stat,
5712         .read_pte = goya_read_pte,
5713         .write_pte = goya_write_pte,
5714         .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5715         .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5716         .send_heartbeat = goya_send_heartbeat,
5717         .debug_coresight = goya_debug_coresight,
5718         .is_device_idle = goya_is_device_idle,
5719         .non_hard_reset_late_init = goya_non_hard_reset_late_init,
5720         .hw_queues_lock = goya_hw_queues_lock,
5721         .hw_queues_unlock = goya_hw_queues_unlock,
5722         .get_pci_id = goya_get_pci_id,
5723         .get_eeprom_data = goya_get_eeprom_data,
5724         .send_cpu_message = goya_send_cpu_message,
5725         .pci_bars_map = goya_pci_bars_map,
5726         .init_iatu = goya_init_iatu,
5727         .rreg = hl_rreg,
5728         .wreg = hl_wreg,
5729         .halt_coresight = goya_halt_coresight,
5730         .ctx_init = goya_ctx_init,
5731         .ctx_fini = goya_ctx_fini,
5732         .get_queue_id_for_cq = goya_get_queue_id_for_cq,
5733         .load_firmware_to_device = goya_load_firmware_to_device,
5734         .load_boot_fit_to_device = goya_load_boot_fit_to_device,
5735         .get_signal_cb_size = goya_get_signal_cb_size,
5736         .get_wait_cb_size = goya_get_wait_cb_size,
5737         .gen_signal_cb = goya_gen_signal_cb,
5738         .gen_wait_cb = goya_gen_wait_cb,
5739         .reset_sob = goya_reset_sob,
5740         .reset_sob_group = goya_reset_sob_group,
5741         .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
5742         .get_device_time = goya_get_device_time,
5743         .collective_wait_init_cs = goya_collective_wait_init_cs,
5744         .collective_wait_create_jobs = goya_collective_wait_create_jobs,
5745         .scramble_addr = hl_mmu_scramble_addr,
5746         .descramble_addr = hl_mmu_descramble_addr,
5747         .ack_protection_bits_errors = goya_ack_protection_bits_errors,
5748         .get_hw_block_id = goya_get_hw_block_id,
5749         .hw_block_mmap = goya_block_mmap,
5750         .enable_events_from_fw = goya_enable_events_from_fw,
5751         .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
5752         .init_firmware_loader = goya_init_firmware_loader,
5753         .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
5754         .state_dump_init = goya_state_dump_init,
5755         .get_sob_addr = &goya_get_sob_addr,
5756         .set_pci_memory_regions = goya_set_pci_memory_regions,
5757         .get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
5758 };
5759
5760 /*
5761  * goya_set_asic_funcs - set Goya function pointers
5762  *
5763  * @*hdev: pointer to hl_device structure
5764  *
5765  */
5766 void goya_set_asic_funcs(struct hl_device *hdev)
5767 {
5768         hdev->asic_funcs = &goya_funcs;
5769 }