Merge tag 'armsoc-soc' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / goya / goya.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "goyaP.h"
9 #include "include/hw_ip/mmu/mmu_general.h"
10 #include "include/hw_ip/mmu/mmu_v1_0.h"
11 #include "include/goya/asic_reg/goya_masks.h"
12
13 #include <linux/pci.h>
14 #include <linux/genalloc.h>
15 #include <linux/hwmon.h>
16 #include <linux/io-64-nonatomic-lo-hi.h>
17 #include <linux/iommu.h>
18 #include <linux/seq_file.h>
19
20 /*
21  * GOYA security scheme:
22  *
23  * 1. Host is protected by:
24  *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
25  *        - MMU
26  *
27  * 2. DRAM is protected by:
28  *        - Range registers (protect the first 512MB)
29  *        - MMU (isolation between users)
30  *
31  * 3. Configuration is protected by:
32  *        - Range registers
33  *        - Protection bits
34  *
35  * When MMU is disabled:
36  *
37  * QMAN DMA: PQ, CQ, CP, DMA are secured.
38  * PQ, CB and the data are on the host.
39  *
40  * QMAN TPC/MME:
41  * PQ, CQ and CP are not secured.
42  * PQ, CB and the data are on the SRAM/DRAM.
43  *
44  * Since QMAN DMA is secured, KMD is parsing the DMA CB:
45  *     - KMD checks DMA pointer
46  *     - WREG, MSG_PROT are not allowed.
47  *     - MSG_LONG/SHORT are allowed.
48  *
49  * A read/write transaction by the QMAN to a protected area will succeed if
50  * and only if the QMAN's CP is secured and MSG_PROT is used
51  *
52  *
53  * When MMU is enabled:
54  *
55  * QMAN DMA: PQ, CQ and CP are secured.
56  * MMU is set to bypass on the Secure props register of the QMAN.
57  * The reasons we don't enable MMU for PQ, CQ and CP are:
58  *     - PQ entry is in kernel address space and KMD doesn't map it.
59  *     - CP writes to MSIX register and to kernel address space (completion
60  *       queue).
61  *
62  * DMA is not secured but because CP is secured, KMD still needs to parse the
63  * CB, but doesn't need to check the DMA addresses.
64  *
65  * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
66  * doesn't map memory in MMU.
67  *
68  * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
69  *
70  * DMA RR does NOT protect host because DMA is not secured
71  *
72  */
73
74 #define GOYA_MMU_REGS_NUM               63
75
76 #define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
77
78 #define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
79 #define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
80 #define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
81 #define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
82 #define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
83 #define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
84 #define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
85 #define GOYA_PLDM_QMAN0_TIMEOUT_USEC    (HL_DEVICE_TIMEOUT_USEC * 30)
86
87 #define GOYA_QMAN0_FENCE_VAL            0xD169B243
88
89 #define GOYA_MAX_STRING_LEN             20
90
91 #define GOYA_CB_POOL_CB_CNT             512
92 #define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
93
94 #define IS_QM_IDLE(engine, qm_glbl_sts0) \
95         (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
96 #define IS_DMA_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(DMA, qm_glbl_sts0)
97 #define IS_TPC_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(TPC, qm_glbl_sts0)
98 #define IS_MME_QM_IDLE(qm_glbl_sts0)    IS_QM_IDLE(MME, qm_glbl_sts0)
99
100 #define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
101         (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
102                         engine##_CMDQ_IDLE_MASK)
103 #define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
104         IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
105 #define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
106         IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
107
108 #define IS_DMA_IDLE(dma_core_sts0) \
109         !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
110
111 #define IS_TPC_IDLE(tpc_cfg_sts) \
112         (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
113
114 #define IS_MME_IDLE(mme_arch_sts) \
115         (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
116
117
118 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
119                 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
120                 "goya cq 4", "goya cpu eq"
121 };
122
123 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
124         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
125         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
126         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
127         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
128         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
129         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
130         [PACKET_FENCE]          = sizeof(struct packet_fence),
131         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
132         [PACKET_NOP]            = sizeof(struct packet_nop),
133         [PACKET_STOP]           = sizeof(struct packet_stop)
134 };
135
136 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
137         mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
138         mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
139         mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
140         mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
141         mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
142         mmTPC0_QM_GLBL_SECURE_PROPS,
143         mmTPC0_QM_GLBL_NON_SECURE_PROPS,
144         mmTPC0_CMDQ_GLBL_SECURE_PROPS,
145         mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
146         mmTPC0_CFG_ARUSER,
147         mmTPC0_CFG_AWUSER,
148         mmTPC1_QM_GLBL_SECURE_PROPS,
149         mmTPC1_QM_GLBL_NON_SECURE_PROPS,
150         mmTPC1_CMDQ_GLBL_SECURE_PROPS,
151         mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
152         mmTPC1_CFG_ARUSER,
153         mmTPC1_CFG_AWUSER,
154         mmTPC2_QM_GLBL_SECURE_PROPS,
155         mmTPC2_QM_GLBL_NON_SECURE_PROPS,
156         mmTPC2_CMDQ_GLBL_SECURE_PROPS,
157         mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
158         mmTPC2_CFG_ARUSER,
159         mmTPC2_CFG_AWUSER,
160         mmTPC3_QM_GLBL_SECURE_PROPS,
161         mmTPC3_QM_GLBL_NON_SECURE_PROPS,
162         mmTPC3_CMDQ_GLBL_SECURE_PROPS,
163         mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
164         mmTPC3_CFG_ARUSER,
165         mmTPC3_CFG_AWUSER,
166         mmTPC4_QM_GLBL_SECURE_PROPS,
167         mmTPC4_QM_GLBL_NON_SECURE_PROPS,
168         mmTPC4_CMDQ_GLBL_SECURE_PROPS,
169         mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
170         mmTPC4_CFG_ARUSER,
171         mmTPC4_CFG_AWUSER,
172         mmTPC5_QM_GLBL_SECURE_PROPS,
173         mmTPC5_QM_GLBL_NON_SECURE_PROPS,
174         mmTPC5_CMDQ_GLBL_SECURE_PROPS,
175         mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
176         mmTPC5_CFG_ARUSER,
177         mmTPC5_CFG_AWUSER,
178         mmTPC6_QM_GLBL_SECURE_PROPS,
179         mmTPC6_QM_GLBL_NON_SECURE_PROPS,
180         mmTPC6_CMDQ_GLBL_SECURE_PROPS,
181         mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
182         mmTPC6_CFG_ARUSER,
183         mmTPC6_CFG_AWUSER,
184         mmTPC7_QM_GLBL_SECURE_PROPS,
185         mmTPC7_QM_GLBL_NON_SECURE_PROPS,
186         mmTPC7_CMDQ_GLBL_SECURE_PROPS,
187         mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
188         mmTPC7_CFG_ARUSER,
189         mmTPC7_CFG_AWUSER,
190         mmMME_QM_GLBL_SECURE_PROPS,
191         mmMME_QM_GLBL_NON_SECURE_PROPS,
192         mmMME_CMDQ_GLBL_SECURE_PROPS,
193         mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
194         mmMME_SBA_CONTROL_DATA,
195         mmMME_SBB_CONTROL_DATA,
196         mmMME_SBC_CONTROL_DATA,
197         mmMME_WBC_CONTROL_DATA,
198         mmPCIE_WRAP_PSOC_ARUSER,
199         mmPCIE_WRAP_PSOC_AWUSER
200 };
201
202 static u32 goya_all_events[] = {
203         GOYA_ASYNC_EVENT_ID_PCIE_IF,
204         GOYA_ASYNC_EVENT_ID_TPC0_ECC,
205         GOYA_ASYNC_EVENT_ID_TPC1_ECC,
206         GOYA_ASYNC_EVENT_ID_TPC2_ECC,
207         GOYA_ASYNC_EVENT_ID_TPC3_ECC,
208         GOYA_ASYNC_EVENT_ID_TPC4_ECC,
209         GOYA_ASYNC_EVENT_ID_TPC5_ECC,
210         GOYA_ASYNC_EVENT_ID_TPC6_ECC,
211         GOYA_ASYNC_EVENT_ID_TPC7_ECC,
212         GOYA_ASYNC_EVENT_ID_MME_ECC,
213         GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
214         GOYA_ASYNC_EVENT_ID_MMU_ECC,
215         GOYA_ASYNC_EVENT_ID_DMA_MACRO,
216         GOYA_ASYNC_EVENT_ID_DMA_ECC,
217         GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
218         GOYA_ASYNC_EVENT_ID_PSOC_MEM,
219         GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
220         GOYA_ASYNC_EVENT_ID_SRAM0,
221         GOYA_ASYNC_EVENT_ID_SRAM1,
222         GOYA_ASYNC_EVENT_ID_SRAM2,
223         GOYA_ASYNC_EVENT_ID_SRAM3,
224         GOYA_ASYNC_EVENT_ID_SRAM4,
225         GOYA_ASYNC_EVENT_ID_SRAM5,
226         GOYA_ASYNC_EVENT_ID_SRAM6,
227         GOYA_ASYNC_EVENT_ID_SRAM7,
228         GOYA_ASYNC_EVENT_ID_SRAM8,
229         GOYA_ASYNC_EVENT_ID_SRAM9,
230         GOYA_ASYNC_EVENT_ID_SRAM10,
231         GOYA_ASYNC_EVENT_ID_SRAM11,
232         GOYA_ASYNC_EVENT_ID_SRAM12,
233         GOYA_ASYNC_EVENT_ID_SRAM13,
234         GOYA_ASYNC_EVENT_ID_SRAM14,
235         GOYA_ASYNC_EVENT_ID_SRAM15,
236         GOYA_ASYNC_EVENT_ID_SRAM16,
237         GOYA_ASYNC_EVENT_ID_SRAM17,
238         GOYA_ASYNC_EVENT_ID_SRAM18,
239         GOYA_ASYNC_EVENT_ID_SRAM19,
240         GOYA_ASYNC_EVENT_ID_SRAM20,
241         GOYA_ASYNC_EVENT_ID_SRAM21,
242         GOYA_ASYNC_EVENT_ID_SRAM22,
243         GOYA_ASYNC_EVENT_ID_SRAM23,
244         GOYA_ASYNC_EVENT_ID_SRAM24,
245         GOYA_ASYNC_EVENT_ID_SRAM25,
246         GOYA_ASYNC_EVENT_ID_SRAM26,
247         GOYA_ASYNC_EVENT_ID_SRAM27,
248         GOYA_ASYNC_EVENT_ID_SRAM28,
249         GOYA_ASYNC_EVENT_ID_SRAM29,
250         GOYA_ASYNC_EVENT_ID_GIC500,
251         GOYA_ASYNC_EVENT_ID_PLL0,
252         GOYA_ASYNC_EVENT_ID_PLL1,
253         GOYA_ASYNC_EVENT_ID_PLL3,
254         GOYA_ASYNC_EVENT_ID_PLL4,
255         GOYA_ASYNC_EVENT_ID_PLL5,
256         GOYA_ASYNC_EVENT_ID_PLL6,
257         GOYA_ASYNC_EVENT_ID_AXI_ECC,
258         GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
259         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
260         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
261         GOYA_ASYNC_EVENT_ID_PCIE_DEC,
262         GOYA_ASYNC_EVENT_ID_TPC0_DEC,
263         GOYA_ASYNC_EVENT_ID_TPC1_DEC,
264         GOYA_ASYNC_EVENT_ID_TPC2_DEC,
265         GOYA_ASYNC_EVENT_ID_TPC3_DEC,
266         GOYA_ASYNC_EVENT_ID_TPC4_DEC,
267         GOYA_ASYNC_EVENT_ID_TPC5_DEC,
268         GOYA_ASYNC_EVENT_ID_TPC6_DEC,
269         GOYA_ASYNC_EVENT_ID_TPC7_DEC,
270         GOYA_ASYNC_EVENT_ID_MME_WACS,
271         GOYA_ASYNC_EVENT_ID_MME_WACSD,
272         GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
273         GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
274         GOYA_ASYNC_EVENT_ID_PSOC,
275         GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
276         GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
277         GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
278         GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
279         GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
280         GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
281         GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
282         GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
283         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
284         GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
285         GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
286         GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
287         GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
288         GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
289         GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
290         GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
291         GOYA_ASYNC_EVENT_ID_TPC0_QM,
292         GOYA_ASYNC_EVENT_ID_TPC1_QM,
293         GOYA_ASYNC_EVENT_ID_TPC2_QM,
294         GOYA_ASYNC_EVENT_ID_TPC3_QM,
295         GOYA_ASYNC_EVENT_ID_TPC4_QM,
296         GOYA_ASYNC_EVENT_ID_TPC5_QM,
297         GOYA_ASYNC_EVENT_ID_TPC6_QM,
298         GOYA_ASYNC_EVENT_ID_TPC7_QM,
299         GOYA_ASYNC_EVENT_ID_MME_QM,
300         GOYA_ASYNC_EVENT_ID_MME_CMDQ,
301         GOYA_ASYNC_EVENT_ID_DMA0_QM,
302         GOYA_ASYNC_EVENT_ID_DMA1_QM,
303         GOYA_ASYNC_EVENT_ID_DMA2_QM,
304         GOYA_ASYNC_EVENT_ID_DMA3_QM,
305         GOYA_ASYNC_EVENT_ID_DMA4_QM,
306         GOYA_ASYNC_EVENT_ID_DMA0_CH,
307         GOYA_ASYNC_EVENT_ID_DMA1_CH,
308         GOYA_ASYNC_EVENT_ID_DMA2_CH,
309         GOYA_ASYNC_EVENT_ID_DMA3_CH,
310         GOYA_ASYNC_EVENT_ID_DMA4_CH,
311         GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
312         GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
313         GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
314         GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
315         GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
316         GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
317         GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
318         GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
319         GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
320         GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
321         GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
322         GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
323         GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
324 };
325
326 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
327 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
328 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
329 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
330
331 void goya_get_fixed_properties(struct hl_device *hdev)
332 {
333         struct asic_fixed_properties *prop = &hdev->asic_prop;
334         int i;
335
336         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
337                 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
338                 prop->hw_queues_props[i].kmd_only = 0;
339         }
340
341         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
342                 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
343                 prop->hw_queues_props[i].kmd_only = 1;
344         }
345
346         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
347                         NUMBER_OF_INT_HW_QUEUES; i++) {
348                 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
349                 prop->hw_queues_props[i].kmd_only = 0;
350         }
351
352         for (; i < HL_MAX_QUEUES; i++)
353                 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
354
355         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
356
357         prop->dram_base_address = DRAM_PHYS_BASE;
358         prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
359         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
360         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
361
362         prop->sram_base_address = SRAM_BASE_ADDR;
363         prop->sram_size = SRAM_SIZE;
364         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
365         prop->sram_user_base_address = prop->sram_base_address +
366                                                 SRAM_USER_BASE_OFFSET;
367
368         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
369         prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
370         if (hdev->pldm)
371                 prop->mmu_pgt_size = 0x800000; /* 8MB */
372         else
373                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
374         prop->mmu_pte_size = HL_PTE_SIZE;
375         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
376         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
377         prop->dram_page_size = PAGE_SIZE_2MB;
378
379         prop->va_space_host_start_address = VA_HOST_SPACE_START;
380         prop->va_space_host_end_address = VA_HOST_SPACE_END;
381         prop->va_space_dram_start_address = VA_DDR_SPACE_START;
382         prop->va_space_dram_end_address = VA_DDR_SPACE_END;
383         prop->dram_size_for_default_page_mapping =
384                         prop->va_space_dram_end_address;
385         prop->cfg_size = CFG_SIZE;
386         prop->max_asid = MAX_ASID;
387         prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
388         prop->high_pll = PLL_HIGH_DEFAULT;
389         prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
390         prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
391         prop->max_power_default = MAX_POWER_DEFAULT;
392         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
393         prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
394         prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
395 }
396
397 /*
398  * goya_pci_bars_map - Map PCI BARS of Goya device
399  *
400  * @hdev: pointer to hl_device structure
401  *
402  * Request PCI regions and map them to kernel virtual addresses.
403  * Returns 0 on success
404  *
405  */
406 static int goya_pci_bars_map(struct hl_device *hdev)
407 {
408         static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
409         bool is_wc[3] = {false, false, true};
410         int rc;
411
412         rc = hl_pci_bars_map(hdev, name, is_wc);
413         if (rc)
414                 return rc;
415
416         hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
417                         (CFG_BASE - SRAM_BASE_ADDR);
418
419         return 0;
420 }
421
422 static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
423 {
424         struct goya_device *goya = hdev->asic_specific;
425         u64 old_addr = addr;
426         int rc;
427
428         if ((goya) && (goya->ddr_bar_cur_addr == addr))
429                 return old_addr;
430
431         /* Inbound Region 1 - Bar 4 - Point to DDR */
432         rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
433         if (rc)
434                 return U64_MAX;
435
436         if (goya) {
437                 old_addr = goya->ddr_bar_cur_addr;
438                 goya->ddr_bar_cur_addr = addr;
439         }
440
441         return old_addr;
442 }
443
444 /*
445  * goya_init_iatu - Initialize the iATU unit inside the PCI controller
446  *
447  * @hdev: pointer to hl_device structure
448  *
449  * This is needed in case the firmware doesn't initialize the iATU
450  *
451  */
452 static int goya_init_iatu(struct hl_device *hdev)
453 {
454         return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
455                                 HOST_PHYS_BASE, HOST_PHYS_SIZE);
456 }
457
458 /*
459  * goya_early_init - GOYA early initialization code
460  *
461  * @hdev: pointer to hl_device structure
462  *
463  * Verify PCI bars
464  * Set DMA masks
465  * PCI controller initialization
466  * Map PCI bars
467  *
468  */
469 static int goya_early_init(struct hl_device *hdev)
470 {
471         struct asic_fixed_properties *prop = &hdev->asic_prop;
472         struct pci_dev *pdev = hdev->pdev;
473         u32 val;
474         int rc;
475
476         goya_get_fixed_properties(hdev);
477
478         /* Check BAR sizes */
479         if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
480                 dev_err(hdev->dev,
481                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
482                         SRAM_CFG_BAR_ID,
483                         (unsigned long long) pci_resource_len(pdev,
484                                                         SRAM_CFG_BAR_ID),
485                         CFG_BAR_SIZE);
486                 return -ENODEV;
487         }
488
489         if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
490                 dev_err(hdev->dev,
491                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
492                         MSIX_BAR_ID,
493                         (unsigned long long) pci_resource_len(pdev,
494                                                                 MSIX_BAR_ID),
495                         MSIX_BAR_SIZE);
496                 return -ENODEV;
497         }
498
499         prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
500
501         rc = hl_pci_init(hdev, 48);
502         if (rc)
503                 return rc;
504
505         if (!hdev->pldm) {
506                 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
507                 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
508                         dev_warn(hdev->dev,
509                                 "PCI strap is not configured correctly, PCI bus errors may occur\n");
510         }
511
512         return 0;
513 }
514
515 /*
516  * goya_early_fini - GOYA early finalization code
517  *
518  * @hdev: pointer to hl_device structure
519  *
520  * Unmap PCI bars
521  *
522  */
523 static int goya_early_fini(struct hl_device *hdev)
524 {
525         hl_pci_fini(hdev);
526
527         return 0;
528 }
529
530 static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
531 {
532         /* mask to zero the MMBP and ASID bits */
533         WREG32_AND(reg, ~0x7FF);
534         WREG32_OR(reg, asid);
535 }
536
537 static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
538 {
539         struct goya_device *goya = hdev->asic_specific;
540
541         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
542                 return;
543
544         if (secure)
545                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
546         else
547                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
548
549         RREG32(mmDMA_QM_0_GLBL_PROT);
550 }
551
552 /*
553  * goya_fetch_psoc_frequency - Fetch PSOC frequency values
554  *
555  * @hdev: pointer to hl_device structure
556  *
557  */
558 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
559 {
560         struct asic_fixed_properties *prop = &hdev->asic_prop;
561
562         prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
563         prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
564         prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
565         prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
566 }
567
568 int goya_late_init(struct hl_device *hdev)
569 {
570         struct asic_fixed_properties *prop = &hdev->asic_prop;
571         int rc;
572
573         goya_fetch_psoc_frequency(hdev);
574
575         rc = goya_mmu_clear_pgt_range(hdev);
576         if (rc) {
577                 dev_err(hdev->dev,
578                         "Failed to clear MMU page tables range %d\n", rc);
579                 return rc;
580         }
581
582         rc = goya_mmu_set_dram_default_page(hdev);
583         if (rc) {
584                 dev_err(hdev->dev, "Failed to set DRAM default page %d\n", rc);
585                 return rc;
586         }
587
588         rc = goya_mmu_add_mappings_for_device_cpu(hdev);
589         if (rc)
590                 return rc;
591
592         rc = goya_init_cpu_queues(hdev);
593         if (rc)
594                 return rc;
595
596         rc = goya_test_cpu_queue(hdev);
597         if (rc)
598                 return rc;
599
600         rc = goya_armcp_info_get(hdev);
601         if (rc) {
602                 dev_err(hdev->dev, "Failed to get armcp info %d\n", rc);
603                 return rc;
604         }
605
606         /* Now that we have the DRAM size in ASIC prop, we need to check
607          * its size and configure the DMA_IF DDR wrap protection (which is in
608          * the MMU block) accordingly. The value is the log2 of the DRAM size
609          */
610         WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
611
612         rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
613         if (rc) {
614                 dev_err(hdev->dev,
615                         "Failed to enable PCI access from CPU %d\n", rc);
616                 return rc;
617         }
618
619         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
620                         GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
621
622         return 0;
623 }
624
625 /*
626  * goya_late_fini - GOYA late tear-down code
627  *
628  * @hdev: pointer to hl_device structure
629  *
630  * Free sensors allocated structures
631  */
632 void goya_late_fini(struct hl_device *hdev)
633 {
634         const struct hwmon_channel_info **channel_info_arr;
635         int i = 0;
636
637         if (!hdev->hl_chip_info->info)
638                 return;
639
640         channel_info_arr = hdev->hl_chip_info->info;
641
642         while (channel_info_arr[i]) {
643                 kfree(channel_info_arr[i]->config);
644                 kfree(channel_info_arr[i]);
645                 i++;
646         }
647
648         kfree(channel_info_arr);
649
650         hdev->hl_chip_info->info = NULL;
651 }
652
653 /*
654  * goya_sw_init - Goya software initialization code
655  *
656  * @hdev: pointer to hl_device structure
657  *
658  */
659 static int goya_sw_init(struct hl_device *hdev)
660 {
661         struct goya_device *goya;
662         int rc;
663
664         /* Allocate device structure */
665         goya = kzalloc(sizeof(*goya), GFP_KERNEL);
666         if (!goya)
667                 return -ENOMEM;
668
669         /* according to goya_init_iatu */
670         goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
671
672         goya->mme_clk = GOYA_PLL_FREQ_LOW;
673         goya->tpc_clk = GOYA_PLL_FREQ_LOW;
674         goya->ic_clk = GOYA_PLL_FREQ_LOW;
675
676         hdev->asic_specific = goya;
677
678         /* Create DMA pool for small allocations */
679         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
680                         &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
681         if (!hdev->dma_pool) {
682                 dev_err(hdev->dev, "failed to create DMA pool\n");
683                 rc = -ENOMEM;
684                 goto free_goya_device;
685         }
686
687         hdev->cpu_accessible_dma_mem =
688                         hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
689                                         HL_CPU_ACCESSIBLE_MEM_SIZE,
690                                         &hdev->cpu_accessible_dma_address,
691                                         GFP_KERNEL | __GFP_ZERO);
692
693         if (!hdev->cpu_accessible_dma_mem) {
694                 rc = -ENOMEM;
695                 goto free_dma_pool;
696         }
697
698         dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
699                 &hdev->cpu_accessible_dma_address);
700
701         hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
702         if (!hdev->cpu_accessible_dma_pool) {
703                 dev_err(hdev->dev,
704                         "Failed to create CPU accessible DMA pool\n");
705                 rc = -ENOMEM;
706                 goto free_cpu_dma_mem;
707         }
708
709         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
710                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
711                                 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
712         if (rc) {
713                 dev_err(hdev->dev,
714                         "Failed to add memory to CPU accessible DMA pool\n");
715                 rc = -EFAULT;
716                 goto free_cpu_accessible_dma_pool;
717         }
718
719         spin_lock_init(&goya->hw_queues_lock);
720
721         return 0;
722
723 free_cpu_accessible_dma_pool:
724         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
725 free_cpu_dma_mem:
726         hdev->asic_funcs->asic_dma_free_coherent(hdev,
727                         HL_CPU_ACCESSIBLE_MEM_SIZE,
728                         hdev->cpu_accessible_dma_mem,
729                         hdev->cpu_accessible_dma_address);
730 free_dma_pool:
731         dma_pool_destroy(hdev->dma_pool);
732 free_goya_device:
733         kfree(goya);
734
735         return rc;
736 }
737
738 /*
739  * goya_sw_fini - Goya software tear-down code
740  *
741  * @hdev: pointer to hl_device structure
742  *
743  */
744 static int goya_sw_fini(struct hl_device *hdev)
745 {
746         struct goya_device *goya = hdev->asic_specific;
747
748         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
749
750         hdev->asic_funcs->asic_dma_free_coherent(hdev,
751                         HL_CPU_ACCESSIBLE_MEM_SIZE,
752                         hdev->cpu_accessible_dma_mem,
753                         hdev->cpu_accessible_dma_address);
754
755         dma_pool_destroy(hdev->dma_pool);
756
757         kfree(goya);
758
759         return 0;
760 }
761
762 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
763                 dma_addr_t bus_address)
764 {
765         struct goya_device *goya = hdev->asic_specific;
766         u32 mtr_base_lo, mtr_base_hi;
767         u32 so_base_lo, so_base_hi;
768         u32 gic_base_lo, gic_base_hi;
769         u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
770
771         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
772         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
773         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
774         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
775
776         gic_base_lo =
777                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
778         gic_base_hi =
779                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
780
781         WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
782         WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
783
784         WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
785         WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
786         WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
787
788         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
789         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
790         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
791         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
792         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
793         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
794         WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
795                         GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
796
797         /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
798         WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
799         WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
800
801         if (goya->hw_cap_initialized & HW_CAP_MMU)
802                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
803         else
804                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
805
806         WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN);
807         WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
808 }
809
810 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
811 {
812         u32 gic_base_lo, gic_base_hi;
813         u64 sob_addr;
814         u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
815
816         gic_base_lo =
817                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
818         gic_base_hi =
819                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
820
821         WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
822         WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
823         WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
824                         GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
825
826         if (dma_id)
827                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
828                                 (dma_id - 1) * 4;
829         else
830                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
831
832         WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off, upper_32_bits(sob_addr));
833         WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
834 }
835
836 /*
837  * goya_init_dma_qmans - Initialize QMAN DMA registers
838  *
839  * @hdev: pointer to hl_device structure
840  *
841  * Initialize the H/W registers of the QMAN DMA channels
842  *
843  */
844 void goya_init_dma_qmans(struct hl_device *hdev)
845 {
846         struct goya_device *goya = hdev->asic_specific;
847         struct hl_hw_queue *q;
848         int i;
849
850         if (goya->hw_cap_initialized & HW_CAP_DMA)
851                 return;
852
853         q = &hdev->kernel_queues[0];
854
855         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
856                 goya_init_dma_qman(hdev, i, q->bus_address);
857                 goya_init_dma_ch(hdev, i);
858         }
859
860         goya->hw_cap_initialized |= HW_CAP_DMA;
861 }
862
863 /*
864  * goya_disable_external_queues - Disable external queues
865  *
866  * @hdev: pointer to hl_device structure
867  *
868  */
869 static void goya_disable_external_queues(struct hl_device *hdev)
870 {
871         WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
872         WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
873         WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
874         WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
875         WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
876 }
877
878 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
879                                 u32 cp_sts_reg, u32 glbl_sts0_reg)
880 {
881         int rc;
882         u32 status;
883
884         /* use the values of TPC0 as they are all the same*/
885
886         WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
887
888         status = RREG32(cp_sts_reg);
889         if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
890                 rc = hl_poll_timeout(
891                         hdev,
892                         cp_sts_reg,
893                         status,
894                         !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
895                         1000,
896                         QMAN_FENCE_TIMEOUT_USEC);
897
898                 /* if QMAN is stuck in fence no need to check for stop */
899                 if (rc)
900                         return 0;
901         }
902
903         rc = hl_poll_timeout(
904                 hdev,
905                 glbl_sts0_reg,
906                 status,
907                 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
908                 1000,
909                 QMAN_STOP_TIMEOUT_USEC);
910
911         if (rc) {
912                 dev_err(hdev->dev,
913                         "Timeout while waiting for QMAN to stop\n");
914                 return -EINVAL;
915         }
916
917         return 0;
918 }
919
920 /*
921  * goya_stop_external_queues - Stop external queues
922  *
923  * @hdev: pointer to hl_device structure
924  *
925  * Returns 0 on success
926  *
927  */
928 static int goya_stop_external_queues(struct hl_device *hdev)
929 {
930         int rc, retval = 0;
931
932         rc = goya_stop_queue(hdev,
933                         mmDMA_QM_0_GLBL_CFG1,
934                         mmDMA_QM_0_CP_STS,
935                         mmDMA_QM_0_GLBL_STS0);
936
937         if (rc) {
938                 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
939                 retval = -EIO;
940         }
941
942         rc = goya_stop_queue(hdev,
943                         mmDMA_QM_1_GLBL_CFG1,
944                         mmDMA_QM_1_CP_STS,
945                         mmDMA_QM_1_GLBL_STS0);
946
947         if (rc) {
948                 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
949                 retval = -EIO;
950         }
951
952         rc = goya_stop_queue(hdev,
953                         mmDMA_QM_2_GLBL_CFG1,
954                         mmDMA_QM_2_CP_STS,
955                         mmDMA_QM_2_GLBL_STS0);
956
957         if (rc) {
958                 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
959                 retval = -EIO;
960         }
961
962         rc = goya_stop_queue(hdev,
963                         mmDMA_QM_3_GLBL_CFG1,
964                         mmDMA_QM_3_CP_STS,
965                         mmDMA_QM_3_GLBL_STS0);
966
967         if (rc) {
968                 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
969                 retval = -EIO;
970         }
971
972         rc = goya_stop_queue(hdev,
973                         mmDMA_QM_4_GLBL_CFG1,
974                         mmDMA_QM_4_CP_STS,
975                         mmDMA_QM_4_GLBL_STS0);
976
977         if (rc) {
978                 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
979                 retval = -EIO;
980         }
981
982         return retval;
983 }
984
985 /*
986  * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
987  *
988  * @hdev: pointer to hl_device structure
989  *
990  * Returns 0 on success
991  *
992  */
993 int goya_init_cpu_queues(struct hl_device *hdev)
994 {
995         struct goya_device *goya = hdev->asic_specific;
996         struct hl_eq *eq;
997         u32 status;
998         struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
999         int err;
1000
1001         if (!hdev->cpu_queues_enable)
1002                 return 0;
1003
1004         if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1005                 return 0;
1006
1007         eq = &hdev->event_queue;
1008
1009         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0,
1010                         lower_32_bits(cpu_pq->bus_address));
1011         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1,
1012                         upper_32_bits(cpu_pq->bus_address));
1013
1014         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(eq->bus_address));
1015         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
1016
1017         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
1018                         lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1019         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
1020                         upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
1021
1022         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
1023         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
1024         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE);
1025
1026         /* Used for EQ CI */
1027         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
1028
1029         WREG32(mmCPU_IF_PF_PQ_PI, 0);
1030
1031         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP);
1032
1033         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1034                         GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1035
1036         err = hl_poll_timeout(
1037                 hdev,
1038                 mmPSOC_GLOBAL_CONF_SCRATCHPAD_7,
1039                 status,
1040                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1041                 1000,
1042                 GOYA_CPU_TIMEOUT_USEC);
1043
1044         if (err) {
1045                 dev_err(hdev->dev,
1046                         "Failed to setup communication with device CPU\n");
1047                 return -EIO;
1048         }
1049
1050         goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1051         return 0;
1052 }
1053
1054 static void goya_set_pll_refclk(struct hl_device *hdev)
1055 {
1056         WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1057         WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1058         WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1059         WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1060
1061         WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1062         WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1063         WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1064         WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1065
1066         WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1067         WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1068         WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1069         WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1070
1071         WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1072         WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1073         WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1074         WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1075
1076         WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1077         WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1078         WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1079         WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1080
1081         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1082         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1083         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1084         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1085
1086         WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1087         WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1088         WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1089         WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1090 }
1091
1092 static void goya_disable_clk_rlx(struct hl_device *hdev)
1093 {
1094         WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1095         WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1096 }
1097
1098 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1099 {
1100         u64 tpc_eml_address;
1101         u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1102         int err, slm_index;
1103
1104         tpc_offset = tpc_id * 0x40000;
1105         tpc_eml_offset = tpc_id * 0x200000;
1106         tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1107         tpc_slm_offset = tpc_eml_address + 0x100000;
1108
1109         /*
1110          * Workaround for Bug H2 #2443 :
1111          * "TPC SB is not initialized on chip reset"
1112          */
1113
1114         val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1115         if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1116                 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1117                         tpc_id);
1118
1119         WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1120
1121         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1122         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1123         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1124         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1125         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1126         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1127         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1128         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1129         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1130         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1131
1132         WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1133                 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1134
1135         err = hl_poll_timeout(
1136                 hdev,
1137                 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1138                 val,
1139                 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1140                 1000,
1141                 HL_DEVICE_TIMEOUT_USEC);
1142
1143         if (err)
1144                 dev_err(hdev->dev,
1145                         "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1146
1147         WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1148                 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1149
1150         msleep(GOYA_RESET_WAIT_MSEC);
1151
1152         WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1153                 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1154
1155         msleep(GOYA_RESET_WAIT_MSEC);
1156
1157         for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1158                 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1159
1160         val = RREG32(tpc_slm_offset);
1161 }
1162
1163 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1164 {
1165         struct goya_device *goya = hdev->asic_specific;
1166         int i;
1167
1168         if (hdev->pldm)
1169                 return;
1170
1171         if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1172                 return;
1173
1174         /* Workaround for H2 #2443 */
1175
1176         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1177                 _goya_tpc_mbist_workaround(hdev, i);
1178
1179         goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1180 }
1181
1182 /*
1183  * goya_init_golden_registers - Initialize golden registers
1184  *
1185  * @hdev: pointer to hl_device structure
1186  *
1187  * Initialize the H/W registers of the device
1188  *
1189  */
1190 static void goya_init_golden_registers(struct hl_device *hdev)
1191 {
1192         struct goya_device *goya = hdev->asic_specific;
1193         u32 polynom[10], tpc_intr_mask, offset;
1194         int i;
1195
1196         if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1197                 return;
1198
1199         polynom[0] = 0x00020080;
1200         polynom[1] = 0x00401000;
1201         polynom[2] = 0x00200800;
1202         polynom[3] = 0x00002000;
1203         polynom[4] = 0x00080200;
1204         polynom[5] = 0x00040100;
1205         polynom[6] = 0x00100400;
1206         polynom[7] = 0x00004000;
1207         polynom[8] = 0x00010000;
1208         polynom[9] = 0x00008000;
1209
1210         /* Mask all arithmetic interrupts from TPC */
1211         tpc_intr_mask = 0x7FFF;
1212
1213         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1214                 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1215                 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1216                 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1217                 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1218                 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1219
1220                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1221                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1222                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1223                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1224                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1225
1226
1227                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1228                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1229                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1230                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1231                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1232
1233                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1234                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1235                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1236                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1237                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1238
1239                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1240                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1241                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1242                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1243                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1244
1245                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1246                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1247                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1248                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1249                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1250         }
1251
1252         WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1253         WREG32(mmMME_AGU, 0x0f0f0f10);
1254         WREG32(mmMME_SEI_MASK, ~0x0);
1255
1256         WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1257         WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1258         WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1259         WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1260         WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1261         WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1262         WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1263         WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1264         WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1265         WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1266         WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1267         WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1268         WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1269         WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1270         WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1271         WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1272         WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1273         WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1274         WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1275         WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1276         WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1277         WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1278         WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1279         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1280         WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1281         WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1282         WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1283         WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1284         WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1285         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1286         WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1287         WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1288         WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1289         WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1290         WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1291         WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1292         WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1293         WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1294         WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1295         WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1296         WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1297         WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1298         WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1299         WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1300         WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1301         WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1302         WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1303         WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1304         WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1305         WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1306         WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1307         WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1308         WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1309         WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1310         WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1311         WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1312         WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1313         WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1314         WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1315         WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1316         WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1317         WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1318         WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1319         WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1320         WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1321         WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1322         WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1323         WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1324         WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1325         WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1326         WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1327         WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1328         WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1329         WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1330         WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1331         WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1332         WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1333         WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1334         WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1335         WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1336         WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1337         WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1338         WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1339         WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1340
1341         WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1342         WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1343         WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1344         WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1345         WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1346         WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1347         WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1348         WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1349         WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1350         WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1351         WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1352         WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1353
1354         WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1355         WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1356         WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1357         WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1358         WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1359         WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1360         WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1361         WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1362         WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1363         WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1364         WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1365         WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1366
1367         WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1368         WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1369         WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1370         WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1371         WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1372         WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1373         WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1374         WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1375         WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1376         WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1377         WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1378         WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1379
1380         WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1381         WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1382         WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1383         WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1384         WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1385         WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1386         WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1387         WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1388         WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1389         WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1390         WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1391         WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1392
1393         WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1394         WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1395         WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1396         WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1397         WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1398         WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1399         WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1400         WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1401         WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1402         WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1403         WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1404         WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1405
1406         WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1407         WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1408         WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1409         WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1410         WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1411         WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1412         WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1413         WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1414         WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1415         WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1416         WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1417         WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1418
1419         for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1420                 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1421                 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1422                 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1423                 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1424                 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1425                 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1426
1427                 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1428                 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1429                 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1430                 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1431                 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1432                 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1433                 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1434                 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1435
1436                 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1437                 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1438         }
1439
1440         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1441                 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1442                                 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1443                 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1444                                 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1445         }
1446
1447         for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1448                 /*
1449                  * Workaround for Bug H2 #2441 :
1450                  * "ST.NOP set trace event illegal opcode"
1451                  */
1452                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1453
1454                 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1455                                 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1456                 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1457                                 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1458         }
1459
1460         WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1461         WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1462                         1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1463
1464         WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1465         WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1466                         1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1467
1468         /*
1469          * Workaround for H2 #HW-23 bug
1470          * Set DMA max outstanding read requests to 240 on DMA CH 1.
1471          * This limitation is still large enough to not affect Gen4 bandwidth.
1472          * We need to only limit that DMA channel because the user can only read
1473          * from Host using DMA CH 1
1474          */
1475         WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1476
1477         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
1478
1479         goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1480 }
1481
1482 static void goya_init_mme_qman(struct hl_device *hdev)
1483 {
1484         u32 mtr_base_lo, mtr_base_hi;
1485         u32 so_base_lo, so_base_hi;
1486         u32 gic_base_lo, gic_base_hi;
1487         u64 qman_base_addr;
1488
1489         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1490         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1491         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1492         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1493
1494         gic_base_lo =
1495                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1496         gic_base_hi =
1497                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1498
1499         qman_base_addr = hdev->asic_prop.sram_base_address +
1500                                 MME_QMAN_BASE_OFFSET;
1501
1502         WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1503         WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1504         WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1505         WREG32(mmMME_QM_PQ_PI, 0);
1506         WREG32(mmMME_QM_PQ_CI, 0);
1507         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1508         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1509         WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1510         WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1511
1512         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1513         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1514         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1515         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1516
1517         /* QMAN CQ has 8 cache lines */
1518         WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1519
1520         WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1521         WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1522
1523         WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1524
1525         WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1526
1527         WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1528
1529         WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1530 }
1531
1532 static void goya_init_mme_cmdq(struct hl_device *hdev)
1533 {
1534         u32 mtr_base_lo, mtr_base_hi;
1535         u32 so_base_lo, so_base_hi;
1536         u32 gic_base_lo, gic_base_hi;
1537         u64 qman_base_addr;
1538
1539         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1540         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1541         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1542         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1543
1544         gic_base_lo =
1545                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1546         gic_base_hi =
1547                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1548
1549         qman_base_addr = hdev->asic_prop.sram_base_address +
1550                                 MME_QMAN_BASE_OFFSET;
1551
1552         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1553         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1554         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1555         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1556
1557         /* CMDQ CQ has 20 cache lines */
1558         WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1559
1560         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1561         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1562
1563         WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1564
1565         WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1566
1567         WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1568
1569         WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1570 }
1571
1572 void goya_init_mme_qmans(struct hl_device *hdev)
1573 {
1574         struct goya_device *goya = hdev->asic_specific;
1575         u32 so_base_lo, so_base_hi;
1576
1577         if (goya->hw_cap_initialized & HW_CAP_MME)
1578                 return;
1579
1580         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1581         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1582
1583         WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1584         WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1585
1586         goya_init_mme_qman(hdev);
1587         goya_init_mme_cmdq(hdev);
1588
1589         goya->hw_cap_initialized |= HW_CAP_MME;
1590 }
1591
1592 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1593 {
1594         u32 mtr_base_lo, mtr_base_hi;
1595         u32 so_base_lo, so_base_hi;
1596         u32 gic_base_lo, gic_base_hi;
1597         u64 qman_base_addr;
1598         u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1599
1600         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1601         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1602         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1603         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1604
1605         gic_base_lo =
1606                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1607         gic_base_hi =
1608                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1609
1610         qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1611
1612         WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1613         WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1614         WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1615         WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1616         WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1617         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1618         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1619         WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1620         WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1621
1622         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1623         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1624         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1625         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1626
1627         WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1628
1629         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1630         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1631
1632         WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1633                         GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1634
1635         WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1636
1637         WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1638
1639         WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1640 }
1641
1642 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1643 {
1644         u32 mtr_base_lo, mtr_base_hi;
1645         u32 so_base_lo, so_base_hi;
1646         u32 gic_base_lo, gic_base_hi;
1647         u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1648
1649         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1650         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1651         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1652         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1653
1654         gic_base_lo =
1655                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1656         gic_base_hi =
1657                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1658
1659         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1660         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1661         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1662         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1663
1664         WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1665
1666         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1667         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1668
1669         WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1670                         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1671
1672         WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1673
1674         WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1675
1676         WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1677 }
1678
1679 void goya_init_tpc_qmans(struct hl_device *hdev)
1680 {
1681         struct goya_device *goya = hdev->asic_specific;
1682         u32 so_base_lo, so_base_hi;
1683         u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1684                         mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1685         int i;
1686
1687         if (goya->hw_cap_initialized & HW_CAP_TPC)
1688                 return;
1689
1690         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1691         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1692
1693         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1694                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1695                                 so_base_lo);
1696                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1697                                 so_base_hi);
1698         }
1699
1700         goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1701         goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1702         goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1703         goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1704         goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1705         goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1706         goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1707         goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1708
1709         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1710                 goya_init_tpc_cmdq(hdev, i);
1711
1712         goya->hw_cap_initialized |= HW_CAP_TPC;
1713 }
1714
1715 /*
1716  * goya_disable_internal_queues - Disable internal queues
1717  *
1718  * @hdev: pointer to hl_device structure
1719  *
1720  */
1721 static void goya_disable_internal_queues(struct hl_device *hdev)
1722 {
1723         WREG32(mmMME_QM_GLBL_CFG0, 0);
1724         WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1725
1726         WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1727         WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1728
1729         WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1730         WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1731
1732         WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1733         WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1734
1735         WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1736         WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1737
1738         WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1739         WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1740
1741         WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1742         WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1743
1744         WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1745         WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1746
1747         WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1748         WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1749 }
1750
1751 /*
1752  * goya_stop_internal_queues - Stop internal queues
1753  *
1754  * @hdev: pointer to hl_device structure
1755  *
1756  * Returns 0 on success
1757  *
1758  */
1759 static int goya_stop_internal_queues(struct hl_device *hdev)
1760 {
1761         int rc, retval = 0;
1762
1763         /*
1764          * Each queue (QMAN) is a separate H/W logic. That means that each
1765          * QMAN can be stopped independently and failure to stop one does NOT
1766          * mandate we should not try to stop other QMANs
1767          */
1768
1769         rc = goya_stop_queue(hdev,
1770                         mmMME_QM_GLBL_CFG1,
1771                         mmMME_QM_CP_STS,
1772                         mmMME_QM_GLBL_STS0);
1773
1774         if (rc) {
1775                 dev_err(hdev->dev, "failed to stop MME QMAN\n");
1776                 retval = -EIO;
1777         }
1778
1779         rc = goya_stop_queue(hdev,
1780                         mmMME_CMDQ_GLBL_CFG1,
1781                         mmMME_CMDQ_CP_STS,
1782                         mmMME_CMDQ_GLBL_STS0);
1783
1784         if (rc) {
1785                 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
1786                 retval = -EIO;
1787         }
1788
1789         rc = goya_stop_queue(hdev,
1790                         mmTPC0_QM_GLBL_CFG1,
1791                         mmTPC0_QM_CP_STS,
1792                         mmTPC0_QM_GLBL_STS0);
1793
1794         if (rc) {
1795                 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
1796                 retval = -EIO;
1797         }
1798
1799         rc = goya_stop_queue(hdev,
1800                         mmTPC0_CMDQ_GLBL_CFG1,
1801                         mmTPC0_CMDQ_CP_STS,
1802                         mmTPC0_CMDQ_GLBL_STS0);
1803
1804         if (rc) {
1805                 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
1806                 retval = -EIO;
1807         }
1808
1809         rc = goya_stop_queue(hdev,
1810                         mmTPC1_QM_GLBL_CFG1,
1811                         mmTPC1_QM_CP_STS,
1812                         mmTPC1_QM_GLBL_STS0);
1813
1814         if (rc) {
1815                 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
1816                 retval = -EIO;
1817         }
1818
1819         rc = goya_stop_queue(hdev,
1820                         mmTPC1_CMDQ_GLBL_CFG1,
1821                         mmTPC1_CMDQ_CP_STS,
1822                         mmTPC1_CMDQ_GLBL_STS0);
1823
1824         if (rc) {
1825                 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
1826                 retval = -EIO;
1827         }
1828
1829         rc = goya_stop_queue(hdev,
1830                         mmTPC2_QM_GLBL_CFG1,
1831                         mmTPC2_QM_CP_STS,
1832                         mmTPC2_QM_GLBL_STS0);
1833
1834         if (rc) {
1835                 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
1836                 retval = -EIO;
1837         }
1838
1839         rc = goya_stop_queue(hdev,
1840                         mmTPC2_CMDQ_GLBL_CFG1,
1841                         mmTPC2_CMDQ_CP_STS,
1842                         mmTPC2_CMDQ_GLBL_STS0);
1843
1844         if (rc) {
1845                 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
1846                 retval = -EIO;
1847         }
1848
1849         rc = goya_stop_queue(hdev,
1850                         mmTPC3_QM_GLBL_CFG1,
1851                         mmTPC3_QM_CP_STS,
1852                         mmTPC3_QM_GLBL_STS0);
1853
1854         if (rc) {
1855                 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
1856                 retval = -EIO;
1857         }
1858
1859         rc = goya_stop_queue(hdev,
1860                         mmTPC3_CMDQ_GLBL_CFG1,
1861                         mmTPC3_CMDQ_CP_STS,
1862                         mmTPC3_CMDQ_GLBL_STS0);
1863
1864         if (rc) {
1865                 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
1866                 retval = -EIO;
1867         }
1868
1869         rc = goya_stop_queue(hdev,
1870                         mmTPC4_QM_GLBL_CFG1,
1871                         mmTPC4_QM_CP_STS,
1872                         mmTPC4_QM_GLBL_STS0);
1873
1874         if (rc) {
1875                 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
1876                 retval = -EIO;
1877         }
1878
1879         rc = goya_stop_queue(hdev,
1880                         mmTPC4_CMDQ_GLBL_CFG1,
1881                         mmTPC4_CMDQ_CP_STS,
1882                         mmTPC4_CMDQ_GLBL_STS0);
1883
1884         if (rc) {
1885                 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
1886                 retval = -EIO;
1887         }
1888
1889         rc = goya_stop_queue(hdev,
1890                         mmTPC5_QM_GLBL_CFG1,
1891                         mmTPC5_QM_CP_STS,
1892                         mmTPC5_QM_GLBL_STS0);
1893
1894         if (rc) {
1895                 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
1896                 retval = -EIO;
1897         }
1898
1899         rc = goya_stop_queue(hdev,
1900                         mmTPC5_CMDQ_GLBL_CFG1,
1901                         mmTPC5_CMDQ_CP_STS,
1902                         mmTPC5_CMDQ_GLBL_STS0);
1903
1904         if (rc) {
1905                 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
1906                 retval = -EIO;
1907         }
1908
1909         rc = goya_stop_queue(hdev,
1910                         mmTPC6_QM_GLBL_CFG1,
1911                         mmTPC6_QM_CP_STS,
1912                         mmTPC6_QM_GLBL_STS0);
1913
1914         if (rc) {
1915                 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
1916                 retval = -EIO;
1917         }
1918
1919         rc = goya_stop_queue(hdev,
1920                         mmTPC6_CMDQ_GLBL_CFG1,
1921                         mmTPC6_CMDQ_CP_STS,
1922                         mmTPC6_CMDQ_GLBL_STS0);
1923
1924         if (rc) {
1925                 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
1926                 retval = -EIO;
1927         }
1928
1929         rc = goya_stop_queue(hdev,
1930                         mmTPC7_QM_GLBL_CFG1,
1931                         mmTPC7_QM_CP_STS,
1932                         mmTPC7_QM_GLBL_STS0);
1933
1934         if (rc) {
1935                 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
1936                 retval = -EIO;
1937         }
1938
1939         rc = goya_stop_queue(hdev,
1940                         mmTPC7_CMDQ_GLBL_CFG1,
1941                         mmTPC7_CMDQ_CP_STS,
1942                         mmTPC7_CMDQ_GLBL_STS0);
1943
1944         if (rc) {
1945                 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
1946                 retval = -EIO;
1947         }
1948
1949         return retval;
1950 }
1951
1952 static void goya_dma_stall(struct hl_device *hdev)
1953 {
1954         WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
1955         WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
1956         WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
1957         WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
1958         WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
1959 }
1960
1961 static void goya_tpc_stall(struct hl_device *hdev)
1962 {
1963         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
1964         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
1965         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
1966         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
1967         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
1968         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
1969         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
1970         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
1971 }
1972
1973 static void goya_mme_stall(struct hl_device *hdev)
1974 {
1975         WREG32(mmMME_STALL, 0xFFFFFFFF);
1976 }
1977
1978 static int goya_enable_msix(struct hl_device *hdev)
1979 {
1980         struct goya_device *goya = hdev->asic_specific;
1981         int cq_cnt = hdev->asic_prop.completion_queues_count;
1982         int rc, i, irq_cnt_init, irq;
1983
1984         if (goya->hw_cap_initialized & HW_CAP_MSIX)
1985                 return 0;
1986
1987         rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
1988                                 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
1989         if (rc < 0) {
1990                 dev_err(hdev->dev,
1991                         "MSI-X: Failed to enable support -- %d/%d\n",
1992                         GOYA_MSIX_ENTRIES, rc);
1993                 return rc;
1994         }
1995
1996         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1997                 irq = pci_irq_vector(hdev->pdev, i);
1998                 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
1999                                 &hdev->completion_queue[i]);
2000                 if (rc) {
2001                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2002                         goto free_irqs;
2003                 }
2004         }
2005
2006         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2007
2008         rc = request_irq(irq, hl_irq_handler_eq, 0,
2009                         goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
2010                         &hdev->event_queue);
2011         if (rc) {
2012                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2013                 goto free_irqs;
2014         }
2015
2016         goya->hw_cap_initialized |= HW_CAP_MSIX;
2017         return 0;
2018
2019 free_irqs:
2020         for (i = 0 ; i < irq_cnt_init ; i++)
2021                 free_irq(pci_irq_vector(hdev->pdev, i),
2022                         &hdev->completion_queue[i]);
2023
2024         pci_free_irq_vectors(hdev->pdev);
2025         return rc;
2026 }
2027
2028 static void goya_sync_irqs(struct hl_device *hdev)
2029 {
2030         struct goya_device *goya = hdev->asic_specific;
2031         int i;
2032
2033         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2034                 return;
2035
2036         /* Wait for all pending IRQs to be finished */
2037         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2038                 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2039
2040         synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
2041 }
2042
2043 static void goya_disable_msix(struct hl_device *hdev)
2044 {
2045         struct goya_device *goya = hdev->asic_specific;
2046         int i, irq;
2047
2048         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2049                 return;
2050
2051         goya_sync_irqs(hdev);
2052
2053         irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
2054         free_irq(irq, &hdev->event_queue);
2055
2056         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2057                 irq = pci_irq_vector(hdev->pdev, i);
2058                 free_irq(irq, &hdev->completion_queue[i]);
2059         }
2060
2061         pci_free_irq_vectors(hdev->pdev);
2062
2063         goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2064 }
2065
2066 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2067 {
2068         u32 wait_timeout_ms, cpu_timeout_ms;
2069
2070         dev_info(hdev->dev,
2071                 "Halting compute engines and disabling interrupts\n");
2072
2073         if (hdev->pldm) {
2074                 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2075                 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2076         } else {
2077                 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2078                 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2079         }
2080
2081         if (hard_reset) {
2082                 /*
2083                  * I don't know what is the state of the CPU so make sure it is
2084                  * stopped in any means necessary
2085                  */
2086                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2087                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2088                         GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2089                 msleep(cpu_timeout_ms);
2090         }
2091
2092         goya_stop_external_queues(hdev);
2093         goya_stop_internal_queues(hdev);
2094
2095         msleep(wait_timeout_ms);
2096
2097         goya_dma_stall(hdev);
2098         goya_tpc_stall(hdev);
2099         goya_mme_stall(hdev);
2100
2101         msleep(wait_timeout_ms);
2102
2103         goya_disable_external_queues(hdev);
2104         goya_disable_internal_queues(hdev);
2105
2106         if (hard_reset) {
2107                 goya_disable_msix(hdev);
2108                 goya_mmu_remove_device_cpu_mappings(hdev);
2109         } else {
2110                 goya_sync_irqs(hdev);
2111         }
2112 }
2113
2114 /*
2115  * goya_push_uboot_to_device() - Push u-boot FW code to device.
2116  * @hdev: Pointer to hl_device structure.
2117  *
2118  * Copy u-boot fw code from firmware file to SRAM BAR.
2119  *
2120  * Return: 0 on success, non-zero for failure.
2121  */
2122 static int goya_push_uboot_to_device(struct hl_device *hdev)
2123 {
2124         char fw_name[200];
2125         void __iomem *dst;
2126
2127         snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
2128         dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
2129
2130         return hl_fw_push_fw_to_device(hdev, fw_name, dst);
2131 }
2132
2133 /*
2134  * goya_push_linux_to_device() - Push LINUX FW code to device.
2135  * @hdev: Pointer to hl_device structure.
2136  *
2137  * Copy LINUX fw code from firmware file to HBM BAR.
2138  *
2139  * Return: 0 on success, non-zero for failure.
2140  */
2141 static int goya_push_linux_to_device(struct hl_device *hdev)
2142 {
2143         char fw_name[200];
2144         void __iomem *dst;
2145
2146         snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
2147         dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2148
2149         return hl_fw_push_fw_to_device(hdev, fw_name, dst);
2150 }
2151
2152 static int goya_pldm_init_cpu(struct hl_device *hdev)
2153 {
2154         u32 val, unit_rst_val;
2155         int rc;
2156
2157         /* Must initialize SRAM scrambler before pushing u-boot to SRAM */
2158         goya_init_golden_registers(hdev);
2159
2160         /* Put ARM cores into reset */
2161         WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
2162         val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2163
2164         /* Reset the CA53 MACRO */
2165         unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2166         WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
2167         val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2168         WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
2169         val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2170
2171         rc = goya_push_uboot_to_device(hdev);
2172         if (rc)
2173                 return rc;
2174
2175         rc = goya_push_linux_to_device(hdev);
2176         if (rc)
2177                 return rc;
2178
2179         WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2180         WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT, CPU_BOOT_STATUS_NA);
2181
2182         WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0,
2183                 lower_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2184         WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0,
2185                 upper_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2186
2187         /* Release ARM core 0 from reset */
2188         WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
2189                                         CPU_RESET_CORE0_DEASSERT);
2190         val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2191
2192         return 0;
2193 }
2194
2195 /*
2196  * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2197  * The version string should be located by that offset.
2198  */
2199 static void goya_read_device_fw_version(struct hl_device *hdev,
2200                                         enum goya_fw_component fwc)
2201 {
2202         const char *name;
2203         u32 ver_off;
2204         char *dest;
2205
2206         switch (fwc) {
2207         case FW_COMP_UBOOT:
2208                 ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
2209                 dest = hdev->asic_prop.uboot_ver;
2210                 name = "U-Boot";
2211                 break;
2212         case FW_COMP_PREBOOT:
2213                 ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
2214                 dest = hdev->asic_prop.preboot_ver;
2215                 name = "Preboot";
2216                 break;
2217         default:
2218                 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2219                 return;
2220         }
2221
2222         ver_off &= ~((u32)SRAM_BASE_ADDR);
2223
2224         if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2225                 memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2226                                                         VERSION_MAX_LEN);
2227         } else {
2228                 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2229                                                                 name, ver_off);
2230                 strcpy(dest, "unavailable");
2231         }
2232 }
2233
2234 static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
2235 {
2236         struct goya_device *goya = hdev->asic_specific;
2237         u32 status;
2238         int rc;
2239
2240         if (!hdev->cpu_enable)
2241                 return 0;
2242
2243         if (goya->hw_cap_initialized & HW_CAP_CPU)
2244                 return 0;
2245
2246         /*
2247          * Before pushing u-boot/linux to device, need to set the ddr bar to
2248          * base address of dram
2249          */
2250         if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2251                 dev_err(hdev->dev,
2252                         "failed to map DDR bar to DRAM base address\n");
2253                 return -EIO;
2254         }
2255
2256         if (hdev->pldm) {
2257                 rc = goya_pldm_init_cpu(hdev);
2258                 if (rc)
2259                         return rc;
2260
2261                 goto out;
2262         }
2263
2264         /* Make sure CPU boot-loader is running */
2265         rc = hl_poll_timeout(
2266                 hdev,
2267                 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2268                 status,
2269                 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
2270                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2271                 10000,
2272                 cpu_timeout);
2273
2274         if (rc) {
2275                 dev_err(hdev->dev, "Error in ARM u-boot!");
2276                 switch (status) {
2277                 case CPU_BOOT_STATUS_NA:
2278                         dev_err(hdev->dev,
2279                                 "ARM status %d - BTL did NOT run\n", status);
2280                         break;
2281                 case CPU_BOOT_STATUS_IN_WFE:
2282                         dev_err(hdev->dev,
2283                                 "ARM status %d - Inside WFE loop\n", status);
2284                         break;
2285                 case CPU_BOOT_STATUS_IN_BTL:
2286                         dev_err(hdev->dev,
2287                                 "ARM status %d - Stuck in BTL\n", status);
2288                         break;
2289                 case CPU_BOOT_STATUS_IN_PREBOOT:
2290                         dev_err(hdev->dev,
2291                                 "ARM status %d - Stuck in Preboot\n", status);
2292                         break;
2293                 case CPU_BOOT_STATUS_IN_SPL:
2294                         dev_err(hdev->dev,
2295                                 "ARM status %d - Stuck in SPL\n", status);
2296                         break;
2297                 case CPU_BOOT_STATUS_IN_UBOOT:
2298                         dev_err(hdev->dev,
2299                                 "ARM status %d - Stuck in u-boot\n", status);
2300                         break;
2301                 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
2302                         dev_err(hdev->dev,
2303                                 "ARM status %d - DDR initialization failed\n",
2304                                 status);
2305                         break;
2306                 case CPU_BOOT_STATUS_UBOOT_NOT_READY:
2307                         dev_err(hdev->dev,
2308                                 "ARM status %d - u-boot stopped by user\n",
2309                                 status);
2310                         break;
2311                 default:
2312                         dev_err(hdev->dev,
2313                                 "ARM status %d - Invalid status code\n",
2314                                 status);
2315                         break;
2316                 }
2317                 return -EIO;
2318         }
2319
2320         /* Read U-Boot version now in case we will later fail */
2321         goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
2322         goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
2323
2324         if (!hdev->fw_loading) {
2325                 dev_info(hdev->dev, "Skip loading FW\n");
2326                 goto out;
2327         }
2328
2329         if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
2330                 goto out;
2331
2332         rc = goya_push_linux_to_device(hdev);
2333         if (rc)
2334                 return rc;
2335
2336         WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2337
2338         rc = hl_poll_timeout(
2339                 hdev,
2340                 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2341                 status,
2342                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2343                 10000,
2344                 cpu_timeout);
2345
2346         if (rc) {
2347                 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
2348                         dev_err(hdev->dev,
2349                                 "ARM u-boot reports FIT image is corrupted\n");
2350                 else
2351                         dev_err(hdev->dev,
2352                                 "ARM Linux failed to load, %d\n", status);
2353                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_NA);
2354                 return -EIO;
2355         }
2356
2357         dev_info(hdev->dev, "Successfully loaded firmware to device\n");
2358
2359 out:
2360         goya->hw_cap_initialized |= HW_CAP_CPU;
2361
2362         return 0;
2363 }
2364
2365 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
2366                                                 u64 phys_addr)
2367 {
2368         u32 status, timeout_usec;
2369         int rc;
2370
2371         if (hdev->pldm)
2372                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
2373         else
2374                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
2375
2376         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
2377         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
2378         WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
2379
2380         rc = hl_poll_timeout(
2381                 hdev,
2382                 MMU_ASID_BUSY,
2383                 status,
2384                 !(status & 0x80000000),
2385                 1000,
2386                 timeout_usec);
2387
2388         if (rc) {
2389                 dev_err(hdev->dev,
2390                         "Timeout during MMU hop0 config of asid %d\n", asid);
2391                 return rc;
2392         }
2393
2394         return 0;
2395 }
2396
2397 int goya_mmu_init(struct hl_device *hdev)
2398 {
2399         struct asic_fixed_properties *prop = &hdev->asic_prop;
2400         struct goya_device *goya = hdev->asic_specific;
2401         u64 hop0_addr;
2402         int rc, i;
2403
2404         if (!hdev->mmu_enable)
2405                 return 0;
2406
2407         if (goya->hw_cap_initialized & HW_CAP_MMU)
2408                 return 0;
2409
2410         hdev->dram_supports_virtual_memory = true;
2411         hdev->dram_default_page_mapping = true;
2412
2413         for (i = 0 ; i < prop->max_asid ; i++) {
2414                 hop0_addr = prop->mmu_pgt_addr +
2415                                 (i * prop->mmu_hop_table_size);
2416
2417                 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2418                 if (rc) {
2419                         dev_err(hdev->dev,
2420                                 "failed to set hop0 addr for asid %d\n", i);
2421                         goto err;
2422                 }
2423         }
2424
2425         goya->hw_cap_initialized |= HW_CAP_MMU;
2426
2427         /* init MMU cache manage page */
2428         WREG32(mmSTLB_CACHE_INV_BASE_39_8,
2429                                 lower_32_bits(MMU_CACHE_MNG_ADDR >> 8));
2430         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2431
2432         /* Remove follower feature due to performance bug */
2433         WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2434                         (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2435
2436         hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
2437
2438         WREG32(mmMMU_MMU_ENABLE, 1);
2439         WREG32(mmMMU_SPI_MASK, 0xF);
2440
2441         return 0;
2442
2443 err:
2444         return rc;
2445 }
2446
2447 /*
2448  * goya_hw_init - Goya hardware initialization code
2449  *
2450  * @hdev: pointer to hl_device structure
2451  *
2452  * Returns 0 on success
2453  *
2454  */
2455 static int goya_hw_init(struct hl_device *hdev)
2456 {
2457         struct asic_fixed_properties *prop = &hdev->asic_prop;
2458         u32 val;
2459         int rc;
2460
2461         dev_info(hdev->dev, "Starting initialization of H/W\n");
2462
2463         /* Perform read from the device to make sure device is up */
2464         val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2465
2466         /*
2467          * Let's mark in the H/W that we have reached this point. We check
2468          * this value in the reset_before_init function to understand whether
2469          * we need to reset the chip before doing H/W init. This register is
2470          * cleared by the H/W upon H/W reset
2471          */
2472         WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY);
2473
2474         rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
2475         if (rc) {
2476                 dev_err(hdev->dev, "failed to initialize CPU\n");
2477                 return rc;
2478         }
2479
2480         goya_tpc_mbist_workaround(hdev);
2481
2482         goya_init_golden_registers(hdev);
2483
2484         /*
2485          * After CPU initialization is finished, change DDR bar mapping inside
2486          * iATU to point to the start address of the MMU page tables
2487          */
2488         if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
2489                         (MMU_PAGE_TABLES_ADDR &
2490                         ~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
2491                 dev_err(hdev->dev,
2492                         "failed to map DDR bar to MMU page tables\n");
2493                 return -EIO;
2494         }
2495
2496         rc = goya_mmu_init(hdev);
2497         if (rc)
2498                 return rc;
2499
2500         goya_init_security(hdev);
2501
2502         goya_init_dma_qmans(hdev);
2503
2504         goya_init_mme_qmans(hdev);
2505
2506         goya_init_tpc_qmans(hdev);
2507
2508         /* MSI-X must be enabled before CPU queues are initialized */
2509         rc = goya_enable_msix(hdev);
2510         if (rc)
2511                 goto disable_queues;
2512
2513         /* Perform read from the device to flush all MSI-X configuration */
2514         val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2515
2516         return 0;
2517
2518 disable_queues:
2519         goya_disable_internal_queues(hdev);
2520         goya_disable_external_queues(hdev);
2521
2522         return rc;
2523 }
2524
2525 /*
2526  * goya_hw_fini - Goya hardware tear-down code
2527  *
2528  * @hdev: pointer to hl_device structure
2529  * @hard_reset: should we do hard reset to all engines or just reset the
2530  *              compute/dma engines
2531  */
2532 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2533 {
2534         struct goya_device *goya = hdev->asic_specific;
2535         u32 reset_timeout_ms, status;
2536
2537         if (hdev->pldm)
2538                 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2539         else
2540                 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2541
2542         if (hard_reset) {
2543                 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2544                 goya_disable_clk_rlx(hdev);
2545                 goya_set_pll_refclk(hdev);
2546
2547                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2548                 dev_info(hdev->dev,
2549                         "Issued HARD reset command, going to wait %dms\n",
2550                         reset_timeout_ms);
2551         } else {
2552                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2553                 dev_info(hdev->dev,
2554                         "Issued SOFT reset command, going to wait %dms\n",
2555                         reset_timeout_ms);
2556         }
2557
2558         /*
2559          * After hard reset, we can't poll the BTM_FSM register because the PSOC
2560          * itself is in reset. In either reset we need to wait until the reset
2561          * is deasserted
2562          */
2563         msleep(reset_timeout_ms);
2564
2565         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2566         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2567                 dev_err(hdev->dev,
2568                         "Timeout while waiting for device to reset 0x%x\n",
2569                         status);
2570
2571         if (!hard_reset) {
2572                 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2573                                                 HW_CAP_GOLDEN | HW_CAP_TPC);
2574                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2575                                 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2576                 return;
2577         }
2578
2579         /* Chicken bit to re-initiate boot sequencer flow */
2580         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2581                 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2582         /* Move boot manager FSM to pre boot sequencer init state */
2583         WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2584                         0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2585
2586         goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2587                                         HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2588                                         HW_CAP_DMA | HW_CAP_MME |
2589                                         HW_CAP_MMU | HW_CAP_TPC_MBIST |
2590                                         HW_CAP_GOLDEN | HW_CAP_TPC);
2591         memset(goya->events_stat, 0, sizeof(goya->events_stat));
2592
2593         if (!hdev->pldm) {
2594                 int rc;
2595                 /* In case we are running inside VM and the VM is
2596                  * shutting down, we need to make sure CPU boot-loader
2597                  * is running before we can continue the VM shutdown.
2598                  * That is because the VM will send an FLR signal that
2599                  * we must answer
2600                  */
2601                 dev_info(hdev->dev,
2602                         "Going to wait up to %ds for CPU boot loader\n",
2603                         GOYA_CPU_TIMEOUT_USEC / 1000 / 1000);
2604
2605                 rc = hl_poll_timeout(
2606                         hdev,
2607                         mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2608                         status,
2609                         (status == CPU_BOOT_STATUS_DRAM_RDY),
2610                         10000,
2611                         GOYA_CPU_TIMEOUT_USEC);
2612                 if (rc)
2613                         dev_err(hdev->dev,
2614                                 "failed to wait for CPU boot loader\n");
2615         }
2616 }
2617
2618 int goya_suspend(struct hl_device *hdev)
2619 {
2620         int rc;
2621
2622         rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2623         if (rc)
2624                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2625
2626         return rc;
2627 }
2628
2629 int goya_resume(struct hl_device *hdev)
2630 {
2631         return goya_init_iatu(hdev);
2632 }
2633
2634 static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2635                 u64 kaddress, phys_addr_t paddress, u32 size)
2636 {
2637         int rc;
2638
2639         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2640                         VM_DONTCOPY | VM_NORESERVE;
2641
2642         rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
2643                                 size, vma->vm_page_prot);
2644         if (rc)
2645                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
2646
2647         return rc;
2648 }
2649
2650 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2651 {
2652         u32 db_reg_offset, db_value;
2653
2654         switch (hw_queue_id) {
2655         case GOYA_QUEUE_ID_DMA_0:
2656                 db_reg_offset = mmDMA_QM_0_PQ_PI;
2657                 break;
2658
2659         case GOYA_QUEUE_ID_DMA_1:
2660                 db_reg_offset = mmDMA_QM_1_PQ_PI;
2661                 break;
2662
2663         case GOYA_QUEUE_ID_DMA_2:
2664                 db_reg_offset = mmDMA_QM_2_PQ_PI;
2665                 break;
2666
2667         case GOYA_QUEUE_ID_DMA_3:
2668                 db_reg_offset = mmDMA_QM_3_PQ_PI;
2669                 break;
2670
2671         case GOYA_QUEUE_ID_DMA_4:
2672                 db_reg_offset = mmDMA_QM_4_PQ_PI;
2673                 break;
2674
2675         case GOYA_QUEUE_ID_CPU_PQ:
2676                 db_reg_offset = mmCPU_IF_PF_PQ_PI;
2677                 break;
2678
2679         case GOYA_QUEUE_ID_MME:
2680                 db_reg_offset = mmMME_QM_PQ_PI;
2681                 break;
2682
2683         case GOYA_QUEUE_ID_TPC0:
2684                 db_reg_offset = mmTPC0_QM_PQ_PI;
2685                 break;
2686
2687         case GOYA_QUEUE_ID_TPC1:
2688                 db_reg_offset = mmTPC1_QM_PQ_PI;
2689                 break;
2690
2691         case GOYA_QUEUE_ID_TPC2:
2692                 db_reg_offset = mmTPC2_QM_PQ_PI;
2693                 break;
2694
2695         case GOYA_QUEUE_ID_TPC3:
2696                 db_reg_offset = mmTPC3_QM_PQ_PI;
2697                 break;
2698
2699         case GOYA_QUEUE_ID_TPC4:
2700                 db_reg_offset = mmTPC4_QM_PQ_PI;
2701                 break;
2702
2703         case GOYA_QUEUE_ID_TPC5:
2704                 db_reg_offset = mmTPC5_QM_PQ_PI;
2705                 break;
2706
2707         case GOYA_QUEUE_ID_TPC6:
2708                 db_reg_offset = mmTPC6_QM_PQ_PI;
2709                 break;
2710
2711         case GOYA_QUEUE_ID_TPC7:
2712                 db_reg_offset = mmTPC7_QM_PQ_PI;
2713                 break;
2714
2715         default:
2716                 /* Should never get here */
2717                 dev_err(hdev->dev, "H/W queue %d is invalid. Can't set pi\n",
2718                         hw_queue_id);
2719                 return;
2720         }
2721
2722         db_value = pi;
2723
2724         /* ring the doorbell */
2725         WREG32(db_reg_offset, db_value);
2726
2727         if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
2728                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2729                                 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
2730 }
2731
2732 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
2733 {
2734         /* The QMANs are on the SRAM so need to copy to IO space */
2735         memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
2736 }
2737
2738 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
2739                                         dma_addr_t *dma_handle, gfp_t flags)
2740 {
2741         void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
2742                                                 dma_handle, flags);
2743
2744         /* Shift to the device's base physical address of host memory */
2745         if (kernel_addr)
2746                 *dma_handle += HOST_PHYS_BASE;
2747
2748         return kernel_addr;
2749 }
2750
2751 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
2752                                         void *cpu_addr, dma_addr_t dma_handle)
2753 {
2754         /* Cancel the device's base physical address of host memory */
2755         dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
2756
2757         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
2758 }
2759
2760 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
2761                                 dma_addr_t *dma_handle, u16 *queue_len)
2762 {
2763         void *base;
2764         u32 offset;
2765
2766         *dma_handle = hdev->asic_prop.sram_base_address;
2767
2768         base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID];
2769
2770         switch (queue_id) {
2771         case GOYA_QUEUE_ID_MME:
2772                 offset = MME_QMAN_BASE_OFFSET;
2773                 *queue_len = MME_QMAN_LENGTH;
2774                 break;
2775         case GOYA_QUEUE_ID_TPC0:
2776                 offset = TPC0_QMAN_BASE_OFFSET;
2777                 *queue_len = TPC_QMAN_LENGTH;
2778                 break;
2779         case GOYA_QUEUE_ID_TPC1:
2780                 offset = TPC1_QMAN_BASE_OFFSET;
2781                 *queue_len = TPC_QMAN_LENGTH;
2782                 break;
2783         case GOYA_QUEUE_ID_TPC2:
2784                 offset = TPC2_QMAN_BASE_OFFSET;
2785                 *queue_len = TPC_QMAN_LENGTH;
2786                 break;
2787         case GOYA_QUEUE_ID_TPC3:
2788                 offset = TPC3_QMAN_BASE_OFFSET;
2789                 *queue_len = TPC_QMAN_LENGTH;
2790                 break;
2791         case GOYA_QUEUE_ID_TPC4:
2792                 offset = TPC4_QMAN_BASE_OFFSET;
2793                 *queue_len = TPC_QMAN_LENGTH;
2794                 break;
2795         case GOYA_QUEUE_ID_TPC5:
2796                 offset = TPC5_QMAN_BASE_OFFSET;
2797                 *queue_len = TPC_QMAN_LENGTH;
2798                 break;
2799         case GOYA_QUEUE_ID_TPC6:
2800                 offset = TPC6_QMAN_BASE_OFFSET;
2801                 *queue_len = TPC_QMAN_LENGTH;
2802                 break;
2803         case GOYA_QUEUE_ID_TPC7:
2804                 offset = TPC7_QMAN_BASE_OFFSET;
2805                 *queue_len = TPC_QMAN_LENGTH;
2806                 break;
2807         default:
2808                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
2809                 return NULL;
2810         }
2811
2812         base += offset;
2813         *dma_handle += offset;
2814
2815         return base;
2816 }
2817
2818 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
2819 {
2820         struct packet_msg_prot *fence_pkt;
2821         u32 *fence_ptr;
2822         dma_addr_t fence_dma_addr;
2823         struct hl_cb *cb;
2824         u32 tmp, timeout;
2825         int rc;
2826
2827         if (hdev->pldm)
2828                 timeout = GOYA_PLDM_QMAN0_TIMEOUT_USEC;
2829         else
2830                 timeout = HL_DEVICE_TIMEOUT_USEC;
2831
2832         if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
2833                 dev_err_ratelimited(hdev->dev,
2834                         "Can't send KMD job on QMAN0 because the device is not idle\n");
2835                 return -EBUSY;
2836         }
2837
2838         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2839                                                         &fence_dma_addr);
2840         if (!fence_ptr) {
2841                 dev_err(hdev->dev,
2842                         "Failed to allocate fence memory for QMAN0\n");
2843                 return -ENOMEM;
2844         }
2845
2846         goya_qman0_set_security(hdev, true);
2847
2848         cb = job->patched_cb;
2849
2850         fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
2851                         job->job_cb_size - sizeof(struct packet_msg_prot));
2852
2853         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2854                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
2855                         (1 << GOYA_PKT_CTL_MB_SHIFT);
2856         fence_pkt->ctl = cpu_to_le32(tmp);
2857         fence_pkt->value = cpu_to_le32(GOYA_QMAN0_FENCE_VAL);
2858         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2859
2860         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
2861                                         job->job_cb_size, cb->bus_address);
2862         if (rc) {
2863                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
2864                 goto free_fence_ptr;
2865         }
2866
2867         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
2868                                 (tmp == GOYA_QMAN0_FENCE_VAL), 1000,
2869                                 timeout, true);
2870
2871         hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
2872
2873         if (rc == -ETIMEDOUT) {
2874                 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
2875                 goto free_fence_ptr;
2876         }
2877
2878 free_fence_ptr:
2879         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2880                                         fence_dma_addr);
2881
2882         goya_qman0_set_security(hdev, false);
2883
2884         return rc;
2885 }
2886
2887 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
2888                                 u32 timeout, long *result)
2889 {
2890         struct goya_device *goya = hdev->asic_specific;
2891
2892         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
2893                 if (result)
2894                         *result = 0;
2895                 return 0;
2896         }
2897
2898         return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
2899                                         timeout, result);
2900 }
2901
2902 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
2903 {
2904         struct packet_msg_prot *fence_pkt;
2905         dma_addr_t pkt_dma_addr;
2906         u32 fence_val, tmp;
2907         dma_addr_t fence_dma_addr;
2908         u32 *fence_ptr;
2909         int rc;
2910
2911         fence_val = GOYA_QMAN0_FENCE_VAL;
2912
2913         fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
2914                                                         &fence_dma_addr);
2915         if (!fence_ptr) {
2916                 dev_err(hdev->dev,
2917                         "Failed to allocate memory for queue testing\n");
2918                 return -ENOMEM;
2919         }
2920
2921         *fence_ptr = 0;
2922
2923         fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
2924                                         sizeof(struct packet_msg_prot),
2925                                         GFP_KERNEL, &pkt_dma_addr);
2926         if (!fence_pkt) {
2927                 dev_err(hdev->dev,
2928                         "Failed to allocate packet for queue testing\n");
2929                 rc = -ENOMEM;
2930                 goto free_fence_ptr;
2931         }
2932
2933         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
2934                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
2935                         (1 << GOYA_PKT_CTL_MB_SHIFT);
2936         fence_pkt->ctl = cpu_to_le32(tmp);
2937         fence_pkt->value = cpu_to_le32(fence_val);
2938         fence_pkt->addr = cpu_to_le64(fence_dma_addr);
2939
2940         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
2941                                         sizeof(struct packet_msg_prot),
2942                                         pkt_dma_addr);
2943         if (rc) {
2944                 dev_err(hdev->dev,
2945                         "Failed to send fence packet\n");
2946                 goto free_pkt;
2947         }
2948
2949         rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
2950                                         1000, GOYA_TEST_QUEUE_WAIT_USEC, true);
2951
2952         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
2953
2954         if (rc == -ETIMEDOUT) {
2955                 dev_err(hdev->dev,
2956                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
2957                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
2958                 rc = -EIO;
2959         } else {
2960                 dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
2961                         hw_queue_id);
2962         }
2963
2964 free_pkt:
2965         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
2966                                         pkt_dma_addr);
2967 free_fence_ptr:
2968         hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
2969                                         fence_dma_addr);
2970         return rc;
2971 }
2972
2973 int goya_test_cpu_queue(struct hl_device *hdev)
2974 {
2975         struct goya_device *goya = hdev->asic_specific;
2976
2977         /*
2978          * check capability here as send_cpu_message() won't update the result
2979          * value if no capability
2980          */
2981         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
2982                 return 0;
2983
2984         return hl_fw_test_cpu_queue(hdev);
2985 }
2986
2987 int goya_test_queues(struct hl_device *hdev)
2988 {
2989         int i, rc, ret_val = 0;
2990
2991         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
2992                 rc = goya_test_queue(hdev, i);
2993                 if (rc)
2994                         ret_val = -EINVAL;
2995         }
2996
2997         return ret_val;
2998 }
2999
3000 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3001                                         gfp_t mem_flags, dma_addr_t *dma_handle)
3002 {
3003         void *kernel_addr;
3004
3005         if (size > GOYA_DMA_POOL_BLK_SIZE)
3006                 return NULL;
3007
3008         kernel_addr =  dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3009
3010         /* Shift to the device's base physical address of host memory */
3011         if (kernel_addr)
3012                 *dma_handle += HOST_PHYS_BASE;
3013
3014         return kernel_addr;
3015 }
3016
3017 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3018                                 dma_addr_t dma_addr)
3019 {
3020         /* Cancel the device's base physical address of host memory */
3021         dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3022
3023         dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3024 }
3025
3026 void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3027                                         dma_addr_t *dma_handle)
3028 {
3029         void *vaddr;
3030
3031         vaddr = hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3032         *dma_handle = (*dma_handle) - hdev->cpu_accessible_dma_address +
3033                         VA_CPU_ACCESSIBLE_MEM_ADDR;
3034
3035         return vaddr;
3036 }
3037
3038 void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3039                                         void *vaddr)
3040 {
3041         hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3042 }
3043
3044 static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3045                                 int nents, enum dma_data_direction dir)
3046 {
3047         struct scatterlist *sg;
3048         int i;
3049
3050         if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3051                 return -ENOMEM;
3052
3053         /* Shift to the device's base physical address of host memory */
3054         for_each_sg(sgl, sg, nents, i)
3055                 sg->dma_address += HOST_PHYS_BASE;
3056
3057         return 0;
3058 }
3059
3060 static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3061                                 int nents, enum dma_data_direction dir)
3062 {
3063         struct scatterlist *sg;
3064         int i;
3065
3066         /* Cancel the device's base physical address of host memory */
3067         for_each_sg(sgl, sg, nents, i)
3068                 sg->dma_address -= HOST_PHYS_BASE;
3069
3070         dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3071 }
3072
3073 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3074 {
3075         struct scatterlist *sg, *sg_next_iter;
3076         u32 count, dma_desc_cnt;
3077         u64 len, len_next;
3078         dma_addr_t addr, addr_next;
3079
3080         dma_desc_cnt = 0;
3081
3082         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3083
3084                 len = sg_dma_len(sg);
3085                 addr = sg_dma_address(sg);
3086
3087                 if (len == 0)
3088                         break;
3089
3090                 while ((count + 1) < sgt->nents) {
3091                         sg_next_iter = sg_next(sg);
3092                         len_next = sg_dma_len(sg_next_iter);
3093                         addr_next = sg_dma_address(sg_next_iter);
3094
3095                         if (len_next == 0)
3096                                 break;
3097
3098                         if ((addr + len == addr_next) &&
3099                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3100                                 len += len_next;
3101                                 count++;
3102                                 sg = sg_next_iter;
3103                         } else {
3104                                 break;
3105                         }
3106                 }
3107
3108                 dma_desc_cnt++;
3109         }
3110
3111         return dma_desc_cnt * sizeof(struct packet_lin_dma);
3112 }
3113
3114 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3115                                 struct hl_cs_parser *parser,
3116                                 struct packet_lin_dma *user_dma_pkt,
3117                                 u64 addr, enum dma_data_direction dir)
3118 {
3119         struct hl_userptr *userptr;
3120         int rc;
3121
3122         if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3123                         parser->job_userptr_list, &userptr))
3124                 goto already_pinned;
3125
3126         userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3127         if (!userptr)
3128                 return -ENOMEM;
3129
3130         rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3131                                 userptr);
3132         if (rc)
3133                 goto free_userptr;
3134
3135         list_add_tail(&userptr->job_node, parser->job_userptr_list);
3136
3137         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3138                                         userptr->sgt->nents, dir);
3139         if (rc) {
3140                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3141                 goto unpin_memory;
3142         }
3143
3144         userptr->dma_mapped = true;
3145         userptr->dir = dir;
3146
3147 already_pinned:
3148         parser->patched_cb_size +=
3149                         goya_get_dma_desc_list_size(hdev, userptr->sgt);
3150
3151         return 0;
3152
3153 unpin_memory:
3154         hl_unpin_host_memory(hdev, userptr);
3155 free_userptr:
3156         kfree(userptr);
3157         return rc;
3158 }
3159
3160 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3161                                 struct hl_cs_parser *parser,
3162                                 struct packet_lin_dma *user_dma_pkt)
3163 {
3164         u64 device_memory_addr, addr;
3165         enum dma_data_direction dir;
3166         enum goya_dma_direction user_dir;
3167         bool sram_addr = true;
3168         bool skip_host_mem_pin = false;
3169         bool user_memset;
3170         u32 ctl;
3171         int rc = 0;
3172
3173         ctl = le32_to_cpu(user_dma_pkt->ctl);
3174
3175         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3176                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3177
3178         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3179                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3180
3181         switch (user_dir) {
3182         case DMA_HOST_TO_DRAM:
3183                 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3184                 dir = DMA_TO_DEVICE;
3185                 sram_addr = false;
3186                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3187                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3188                 if (user_memset)
3189                         skip_host_mem_pin = true;
3190                 break;
3191
3192         case DMA_DRAM_TO_HOST:
3193                 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3194                 dir = DMA_FROM_DEVICE;
3195                 sram_addr = false;
3196                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3197                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3198                 break;
3199
3200         case DMA_HOST_TO_SRAM:
3201                 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3202                 dir = DMA_TO_DEVICE;
3203                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3204                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3205                 if (user_memset)
3206                         skip_host_mem_pin = true;
3207                 break;
3208
3209         case DMA_SRAM_TO_HOST:
3210                 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3211                 dir = DMA_FROM_DEVICE;
3212                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3213                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3214                 break;
3215         default:
3216                 dev_err(hdev->dev, "DMA direction is undefined\n");
3217                 return -EFAULT;
3218         }
3219
3220         if (sram_addr) {
3221                 if (!hl_mem_area_inside_range(device_memory_addr,
3222                                 le32_to_cpu(user_dma_pkt->tsize),
3223                                 hdev->asic_prop.sram_user_base_address,
3224                                 hdev->asic_prop.sram_end_address)) {
3225
3226                         dev_err(hdev->dev,
3227                                 "SRAM address 0x%llx + 0x%x is invalid\n",
3228                                 device_memory_addr,
3229                                 user_dma_pkt->tsize);
3230                         return -EFAULT;
3231                 }
3232         } else {
3233                 if (!hl_mem_area_inside_range(device_memory_addr,
3234                                 le32_to_cpu(user_dma_pkt->tsize),
3235                                 hdev->asic_prop.dram_user_base_address,
3236                                 hdev->asic_prop.dram_end_address)) {
3237
3238                         dev_err(hdev->dev,
3239                                 "DRAM address 0x%llx + 0x%x is invalid\n",
3240                                 device_memory_addr,
3241                                 user_dma_pkt->tsize);
3242                         return -EFAULT;
3243                 }
3244         }
3245
3246         if (skip_host_mem_pin)
3247                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3248         else {
3249                 if ((dir == DMA_TO_DEVICE) &&
3250                                 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3251                         dev_err(hdev->dev,
3252                                 "Can't DMA from host on queue other then 1\n");
3253                         return -EFAULT;
3254                 }
3255
3256                 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3257                                                 addr, dir);
3258         }
3259
3260         return rc;
3261 }
3262
3263 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3264                                 struct hl_cs_parser *parser,
3265                                 struct packet_lin_dma *user_dma_pkt)
3266 {
3267         u64 sram_memory_addr, dram_memory_addr;
3268         enum goya_dma_direction user_dir;
3269         u32 ctl;
3270
3271         ctl = le32_to_cpu(user_dma_pkt->ctl);
3272         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3273                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3274
3275         if (user_dir == DMA_DRAM_TO_SRAM) {
3276                 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3277                 dram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3278                 sram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3279         } else {
3280                 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3281                 sram_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3282                 dram_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3283         }
3284
3285         if (!hl_mem_area_inside_range(sram_memory_addr,
3286                                 le32_to_cpu(user_dma_pkt->tsize),
3287                                 hdev->asic_prop.sram_user_base_address,
3288                                 hdev->asic_prop.sram_end_address)) {
3289                 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3290                         sram_memory_addr, user_dma_pkt->tsize);
3291                 return -EFAULT;
3292         }
3293
3294         if (!hl_mem_area_inside_range(dram_memory_addr,
3295                                 le32_to_cpu(user_dma_pkt->tsize),
3296                                 hdev->asic_prop.dram_user_base_address,
3297                                 hdev->asic_prop.dram_end_address)) {
3298                 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3299                         dram_memory_addr, user_dma_pkt->tsize);
3300                 return -EFAULT;
3301         }
3302
3303         parser->patched_cb_size += sizeof(*user_dma_pkt);
3304
3305         return 0;
3306 }
3307
3308 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3309                                 struct hl_cs_parser *parser,
3310                                 struct packet_lin_dma *user_dma_pkt)
3311 {
3312         enum goya_dma_direction user_dir;
3313         u32 ctl;
3314         int rc;
3315
3316         dev_dbg(hdev->dev, "DMA packet details:\n");
3317         dev_dbg(hdev->dev, "source == 0x%llx\n",
3318                 le64_to_cpu(user_dma_pkt->src_addr));
3319         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3320                 le64_to_cpu(user_dma_pkt->dst_addr));
3321         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3322
3323         ctl = le32_to_cpu(user_dma_pkt->ctl);
3324         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3325                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3326
3327         /*
3328          * Special handling for DMA with size 0. The H/W has a bug where
3329          * this can cause the QMAN DMA to get stuck, so block it here.
3330          */
3331         if (user_dma_pkt->tsize == 0) {
3332                 dev_err(hdev->dev,
3333                         "Got DMA with size 0, might reset the device\n");
3334                 return -EINVAL;
3335         }
3336
3337         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3338                 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3339         else
3340                 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3341
3342         return rc;
3343 }
3344
3345 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3346                                 struct hl_cs_parser *parser,
3347                                 struct packet_lin_dma *user_dma_pkt)
3348 {
3349         dev_dbg(hdev->dev, "DMA packet details:\n");
3350         dev_dbg(hdev->dev, "source == 0x%llx\n",
3351                 le64_to_cpu(user_dma_pkt->src_addr));
3352         dev_dbg(hdev->dev, "destination == 0x%llx\n",
3353                 le64_to_cpu(user_dma_pkt->dst_addr));
3354         dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3355
3356         /*
3357          * WA for HW-23.
3358          * We can't allow user to read from Host using QMANs other than 1.
3359          */
3360         if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
3361                 hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
3362                                 le32_to_cpu(user_dma_pkt->tsize),
3363                                 hdev->asic_prop.va_space_host_start_address,
3364                                 hdev->asic_prop.va_space_host_end_address)) {
3365                 dev_err(hdev->dev,
3366                         "Can't DMA from host on queue other then 1\n");
3367                 return -EFAULT;
3368         }
3369
3370         if (user_dma_pkt->tsize == 0) {
3371                 dev_err(hdev->dev,
3372                         "Got DMA with size 0, might reset the device\n");
3373                 return -EINVAL;
3374         }
3375
3376         parser->patched_cb_size += sizeof(*user_dma_pkt);
3377
3378         return 0;
3379 }
3380
3381 static int goya_validate_wreg32(struct hl_device *hdev,
3382                                 struct hl_cs_parser *parser,
3383                                 struct packet_wreg32 *wreg_pkt)
3384 {
3385         struct goya_device *goya = hdev->asic_specific;
3386         u32 sob_start_addr, sob_end_addr;
3387         u16 reg_offset;
3388
3389         reg_offset = le32_to_cpu(wreg_pkt->ctl) &
3390                         GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3391
3392         dev_dbg(hdev->dev, "WREG32 packet details:\n");
3393         dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3394         dev_dbg(hdev->dev, "value      == 0x%x\n",
3395                 le32_to_cpu(wreg_pkt->value));
3396
3397         if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
3398                 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3399                         reg_offset);
3400                 return -EPERM;
3401         }
3402
3403         /*
3404          * With MMU, DMA channels are not secured, so it doesn't matter where
3405          * the WR COMP will be written to because it will go out with
3406          * non-secured property
3407          */
3408         if (goya->hw_cap_initialized & HW_CAP_MMU)
3409                 return 0;
3410
3411         sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3412         sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3413
3414         if ((le32_to_cpu(wreg_pkt->value) < sob_start_addr) ||
3415                         (le32_to_cpu(wreg_pkt->value) > sob_end_addr)) {
3416
3417                 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3418                         wreg_pkt->value);
3419                 return -EPERM;
3420         }
3421
3422         return 0;
3423 }
3424
3425 static int goya_validate_cb(struct hl_device *hdev,
3426                         struct hl_cs_parser *parser, bool is_mmu)
3427 {
3428         u32 cb_parsed_length = 0;
3429         int rc = 0;
3430
3431         parser->patched_cb_size = 0;
3432
3433         /* cb_user_size is more than 0 so loop will always be executed */
3434         while (cb_parsed_length < parser->user_cb_size) {
3435                 enum packet_id pkt_id;
3436                 u16 pkt_size;
3437                 struct goya_packet *user_pkt;
3438
3439                 user_pkt = (struct goya_packet *) (uintptr_t)
3440                         (parser->user_cb->kernel_address + cb_parsed_length);
3441
3442                 pkt_id = (enum packet_id) (
3443                                 (le64_to_cpu(user_pkt->header) &
3444                                 PACKET_HEADER_PACKET_ID_MASK) >>
3445                                         PACKET_HEADER_PACKET_ID_SHIFT);
3446
3447                 pkt_size = goya_packet_sizes[pkt_id];
3448                 cb_parsed_length += pkt_size;
3449                 if (cb_parsed_length > parser->user_cb_size) {
3450                         dev_err(hdev->dev,
3451                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3452                         rc = -EINVAL;
3453                         break;
3454                 }
3455
3456                 switch (pkt_id) {
3457                 case PACKET_WREG_32:
3458                         /*
3459                          * Although it is validated after copy in patch_cb(),
3460                          * need to validate here as well because patch_cb() is
3461                          * not called in MMU path while this function is called
3462                          */
3463                         rc = goya_validate_wreg32(hdev,
3464                                 parser, (struct packet_wreg32 *) user_pkt);
3465                         break;
3466
3467                 case PACKET_WREG_BULK:
3468                         dev_err(hdev->dev,
3469                                 "User not allowed to use WREG_BULK\n");
3470                         rc = -EPERM;
3471                         break;
3472
3473                 case PACKET_MSG_PROT:
3474                         dev_err(hdev->dev,
3475                                 "User not allowed to use MSG_PROT\n");
3476                         rc = -EPERM;
3477                         break;
3478
3479                 case PACKET_CP_DMA:
3480                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3481                         rc = -EPERM;
3482                         break;
3483
3484                 case PACKET_STOP:
3485                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3486                         rc = -EPERM;
3487                         break;
3488
3489                 case PACKET_LIN_DMA:
3490                         if (is_mmu)
3491                                 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3492                                         (struct packet_lin_dma *) user_pkt);
3493                         else
3494                                 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3495                                         (struct packet_lin_dma *) user_pkt);
3496                         break;
3497
3498                 case PACKET_MSG_LONG:
3499                 case PACKET_MSG_SHORT:
3500                 case PACKET_FENCE:
3501                 case PACKET_NOP:
3502                         parser->patched_cb_size += pkt_size;
3503                         break;
3504
3505                 default:
3506                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3507                                 pkt_id);
3508                         rc = -EINVAL;
3509                         break;
3510                 }
3511
3512                 if (rc)
3513                         break;
3514         }
3515
3516         /*
3517          * The new CB should have space at the end for two MSG_PROT packets:
3518          * 1. A packet that will act as a completion packet
3519          * 2. A packet that will generate MSI-X interrupt
3520          */
3521         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3522
3523         return rc;
3524 }
3525
3526 static int goya_patch_dma_packet(struct hl_device *hdev,
3527                                 struct hl_cs_parser *parser,
3528                                 struct packet_lin_dma *user_dma_pkt,
3529                                 struct packet_lin_dma *new_dma_pkt,
3530                                 u32 *new_dma_pkt_size)
3531 {
3532         struct hl_userptr *userptr;
3533         struct scatterlist *sg, *sg_next_iter;
3534         u32 count, dma_desc_cnt;
3535         u64 len, len_next;
3536         dma_addr_t dma_addr, dma_addr_next;
3537         enum goya_dma_direction user_dir;
3538         u64 device_memory_addr, addr;
3539         enum dma_data_direction dir;
3540         struct sg_table *sgt;
3541         bool skip_host_mem_pin = false;
3542         bool user_memset;
3543         u32 user_rdcomp_mask, user_wrcomp_mask, ctl;
3544
3545         ctl = le32_to_cpu(user_dma_pkt->ctl);
3546
3547         user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3548                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3549
3550         user_memset = (ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3551                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3552
3553         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3554                         (user_dma_pkt->tsize == 0)) {
3555                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3556                 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3557                 return 0;
3558         }
3559
3560         if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3561                 addr = le64_to_cpu(user_dma_pkt->src_addr);
3562                 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3563                 dir = DMA_TO_DEVICE;
3564                 if (user_memset)
3565                         skip_host_mem_pin = true;
3566         } else {
3567                 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3568                 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3569                 dir = DMA_FROM_DEVICE;
3570         }
3571
3572         if ((!skip_host_mem_pin) &&
3573                 (hl_userptr_is_pinned(hdev, addr,
3574                         le32_to_cpu(user_dma_pkt->tsize),
3575                         parser->job_userptr_list, &userptr) == false)) {
3576                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3577                                 addr, user_dma_pkt->tsize);
3578                 return -EFAULT;
3579         }
3580
3581         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3582                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3583                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3584                 return 0;
3585         }
3586
3587         user_rdcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK;
3588
3589         user_wrcomp_mask = ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK;
3590
3591         sgt = userptr->sgt;
3592         dma_desc_cnt = 0;
3593
3594         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3595                 len = sg_dma_len(sg);
3596                 dma_addr = sg_dma_address(sg);
3597
3598                 if (len == 0)
3599                         break;
3600
3601                 while ((count + 1) < sgt->nents) {
3602                         sg_next_iter = sg_next(sg);
3603                         len_next = sg_dma_len(sg_next_iter);
3604                         dma_addr_next = sg_dma_address(sg_next_iter);
3605
3606                         if (len_next == 0)
3607                                 break;
3608
3609                         if ((dma_addr + len == dma_addr_next) &&
3610                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3611                                 len += len_next;
3612                                 count++;
3613                                 sg = sg_next_iter;
3614                         } else {
3615                                 break;
3616                         }
3617                 }
3618
3619                 ctl = le32_to_cpu(user_dma_pkt->ctl);
3620                 if (likely(dma_desc_cnt))
3621                         ctl &= ~GOYA_PKT_CTL_EB_MASK;
3622                 ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3623                                 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3624                 new_dma_pkt->ctl = cpu_to_le32(ctl);
3625                 new_dma_pkt->tsize = cpu_to_le32((u32) len);
3626
3627                 if (dir == DMA_TO_DEVICE) {
3628                         new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3629                         new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3630                 } else {
3631                         new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3632                         new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3633                 }
3634
3635                 if (!user_memset)
3636                         device_memory_addr += len;
3637                 dma_desc_cnt++;
3638                 new_dma_pkt++;
3639         }
3640
3641         if (!dma_desc_cnt) {
3642                 dev_err(hdev->dev,
3643                         "Error of 0 SG entries when patching DMA packet\n");
3644                 return -EFAULT;
3645         }
3646
3647         /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
3648         new_dma_pkt--;
3649         new_dma_pkt->ctl |= cpu_to_le32(user_rdcomp_mask | user_wrcomp_mask);
3650
3651         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
3652
3653         return 0;
3654 }
3655
3656 static int goya_patch_cb(struct hl_device *hdev,
3657                                 struct hl_cs_parser *parser)
3658 {
3659         u32 cb_parsed_length = 0;
3660         u32 cb_patched_cur_length = 0;
3661         int rc = 0;
3662
3663         /* cb_user_size is more than 0 so loop will always be executed */
3664         while (cb_parsed_length < parser->user_cb_size) {
3665                 enum packet_id pkt_id;
3666                 u16 pkt_size;
3667                 u32 new_pkt_size = 0;
3668                 struct goya_packet *user_pkt, *kernel_pkt;
3669
3670                 user_pkt = (struct goya_packet *) (uintptr_t)
3671                         (parser->user_cb->kernel_address + cb_parsed_length);
3672                 kernel_pkt = (struct goya_packet *) (uintptr_t)
3673                         (parser->patched_cb->kernel_address +
3674                                         cb_patched_cur_length);
3675
3676                 pkt_id = (enum packet_id) (
3677                                 (le64_to_cpu(user_pkt->header) &
3678                                 PACKET_HEADER_PACKET_ID_MASK) >>
3679                                         PACKET_HEADER_PACKET_ID_SHIFT);
3680
3681                 pkt_size = goya_packet_sizes[pkt_id];
3682                 cb_parsed_length += pkt_size;
3683                 if (cb_parsed_length > parser->user_cb_size) {
3684                         dev_err(hdev->dev,
3685                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3686                         rc = -EINVAL;
3687                         break;
3688                 }
3689
3690                 switch (pkt_id) {
3691                 case PACKET_LIN_DMA:
3692                         rc = goya_patch_dma_packet(hdev, parser,
3693                                         (struct packet_lin_dma *) user_pkt,
3694                                         (struct packet_lin_dma *) kernel_pkt,
3695                                         &new_pkt_size);
3696                         cb_patched_cur_length += new_pkt_size;
3697                         break;
3698
3699                 case PACKET_WREG_32:
3700                         memcpy(kernel_pkt, user_pkt, pkt_size);
3701                         cb_patched_cur_length += pkt_size;
3702                         rc = goya_validate_wreg32(hdev, parser,
3703                                         (struct packet_wreg32 *) kernel_pkt);
3704                         break;
3705
3706                 case PACKET_WREG_BULK:
3707                         dev_err(hdev->dev,
3708                                 "User not allowed to use WREG_BULK\n");
3709                         rc = -EPERM;
3710                         break;
3711
3712                 case PACKET_MSG_PROT:
3713                         dev_err(hdev->dev,
3714                                 "User not allowed to use MSG_PROT\n");
3715                         rc = -EPERM;
3716                         break;
3717
3718                 case PACKET_CP_DMA:
3719                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3720                         rc = -EPERM;
3721                         break;
3722
3723                 case PACKET_STOP:
3724                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3725                         rc = -EPERM;
3726                         break;
3727
3728                 case PACKET_MSG_LONG:
3729                 case PACKET_MSG_SHORT:
3730                 case PACKET_FENCE:
3731                 case PACKET_NOP:
3732                         memcpy(kernel_pkt, user_pkt, pkt_size);
3733                         cb_patched_cur_length += pkt_size;
3734                         break;
3735
3736                 default:
3737                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3738                                 pkt_id);
3739                         rc = -EINVAL;
3740                         break;
3741                 }
3742
3743                 if (rc)
3744                         break;
3745         }
3746
3747         return rc;
3748 }
3749
3750 static int goya_parse_cb_mmu(struct hl_device *hdev,
3751                 struct hl_cs_parser *parser)
3752 {
3753         u64 patched_cb_handle;
3754         u32 patched_cb_size;
3755         struct hl_cb *user_cb;
3756         int rc;
3757
3758         /*
3759          * The new CB should have space at the end for two MSG_PROT pkt:
3760          * 1. A packet that will act as a completion packet
3761          * 2. A packet that will generate MSI-X interrupt
3762          */
3763         parser->patched_cb_size = parser->user_cb_size +
3764                         sizeof(struct packet_msg_prot) * 2;
3765
3766         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3767                                 parser->patched_cb_size,
3768                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
3769
3770         if (rc) {
3771                 dev_err(hdev->dev,
3772                         "Failed to allocate patched CB for DMA CS %d\n",
3773                         rc);
3774                 return rc;
3775         }
3776
3777         patched_cb_handle >>= PAGE_SHIFT;
3778         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3779                                 (u32) patched_cb_handle);
3780         /* hl_cb_get should never fail here so use kernel WARN */
3781         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3782                         (u32) patched_cb_handle);
3783         if (!parser->patched_cb) {
3784                 rc = -EFAULT;
3785                 goto out;
3786         }
3787
3788         /*
3789          * The check that parser->user_cb_size <= parser->user_cb->size was done
3790          * in validate_queue_index().
3791          */
3792         memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
3793                 (void *) (uintptr_t) parser->user_cb->kernel_address,
3794                 parser->user_cb_size);
3795
3796         patched_cb_size = parser->patched_cb_size;
3797
3798         /* validate patched CB instead of user CB */
3799         user_cb = parser->user_cb;
3800         parser->user_cb = parser->patched_cb;
3801         rc = goya_validate_cb(hdev, parser, true);
3802         parser->user_cb = user_cb;
3803
3804         if (rc) {
3805                 hl_cb_put(parser->patched_cb);
3806                 goto out;
3807         }
3808
3809         if (patched_cb_size != parser->patched_cb_size) {
3810                 dev_err(hdev->dev, "user CB size mismatch\n");
3811                 hl_cb_put(parser->patched_cb);
3812                 rc = -EINVAL;
3813                 goto out;
3814         }
3815
3816 out:
3817         /*
3818          * Always call cb destroy here because we still have 1 reference
3819          * to it by calling cb_get earlier. After the job will be completed,
3820          * cb_put will release it, but here we want to remove it from the
3821          * idr
3822          */
3823         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3824                                         patched_cb_handle << PAGE_SHIFT);
3825
3826         return rc;
3827 }
3828
3829 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
3830                                 struct hl_cs_parser *parser)
3831 {
3832         u64 patched_cb_handle;
3833         int rc;
3834
3835         rc = goya_validate_cb(hdev, parser, false);
3836
3837         if (rc)
3838                 goto free_userptr;
3839
3840         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
3841                                 parser->patched_cb_size,
3842                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
3843         if (rc) {
3844                 dev_err(hdev->dev,
3845                         "Failed to allocate patched CB for DMA CS %d\n", rc);
3846                 goto free_userptr;
3847         }
3848
3849         patched_cb_handle >>= PAGE_SHIFT;
3850         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
3851                                 (u32) patched_cb_handle);
3852         /* hl_cb_get should never fail here so use kernel WARN */
3853         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
3854                         (u32) patched_cb_handle);
3855         if (!parser->patched_cb) {
3856                 rc = -EFAULT;
3857                 goto out;
3858         }
3859
3860         rc = goya_patch_cb(hdev, parser);
3861
3862         if (rc)
3863                 hl_cb_put(parser->patched_cb);
3864
3865 out:
3866         /*
3867          * Always call cb destroy here because we still have 1 reference
3868          * to it by calling cb_get earlier. After the job will be completed,
3869          * cb_put will release it, but here we want to remove it from the
3870          * idr
3871          */
3872         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
3873                                 patched_cb_handle << PAGE_SHIFT);
3874
3875 free_userptr:
3876         if (rc)
3877                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
3878         return rc;
3879 }
3880
3881 static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
3882                                         struct hl_cs_parser *parser)
3883 {
3884         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
3885         struct goya_device *goya = hdev->asic_specific;
3886
3887         if (goya->hw_cap_initialized & HW_CAP_MMU)
3888                 return 0;
3889
3890         /* For internal queue jobs, just check if CB address is valid */
3891         if (hl_mem_area_inside_range(
3892                         (u64) (uintptr_t) parser->user_cb,
3893                         parser->user_cb_size,
3894                         asic_prop->sram_user_base_address,
3895                         asic_prop->sram_end_address))
3896                 return 0;
3897
3898         if (hl_mem_area_inside_range(
3899                         (u64) (uintptr_t) parser->user_cb,
3900                         parser->user_cb_size,
3901                         asic_prop->dram_user_base_address,
3902                         asic_prop->dram_end_address))
3903                 return 0;
3904
3905         dev_err(hdev->dev,
3906                 "Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
3907                 parser->user_cb, parser->user_cb_size);
3908
3909         return -EFAULT;
3910 }
3911
3912 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
3913 {
3914         struct goya_device *goya = hdev->asic_specific;
3915
3916         if (!parser->ext_queue)
3917                 return goya_parse_cb_no_ext_queue(hdev, parser);
3918
3919         if (goya->hw_cap_initialized & HW_CAP_MMU)
3920                 return goya_parse_cb_mmu(hdev, parser);
3921         else
3922                 return goya_parse_cb_no_mmu(hdev, parser);
3923 }
3924
3925 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
3926                                 u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec)
3927 {
3928         struct packet_msg_prot *cq_pkt;
3929         u32 tmp;
3930
3931         cq_pkt = (struct packet_msg_prot *) (uintptr_t)
3932                 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
3933
3934         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3935                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
3936                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3937         cq_pkt->ctl = cpu_to_le32(tmp);
3938         cq_pkt->value = cpu_to_le32(cq_val);
3939         cq_pkt->addr = cpu_to_le64(cq_addr);
3940
3941         cq_pkt++;
3942
3943         tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3944                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3945         cq_pkt->ctl = cpu_to_le32(tmp);
3946         cq_pkt->value = cpu_to_le32(msix_vec & 0x7FF);
3947         cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF);
3948 }
3949
3950 void goya_update_eq_ci(struct hl_device *hdev, u32 val)
3951 {
3952         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
3953 }
3954
3955 void goya_restore_phase_topology(struct hl_device *hdev)
3956 {
3957
3958 }
3959
3960 static void goya_clear_sm_regs(struct hl_device *hdev)
3961 {
3962         int i, num_of_sob_in_longs, num_of_mon_in_longs;
3963
3964         num_of_sob_in_longs =
3965                 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
3966
3967         num_of_mon_in_longs =
3968                 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
3969
3970         for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
3971                 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
3972
3973         for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
3974                 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
3975
3976         /* Flush all WREG to prevent race */
3977         i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
3978 }
3979
3980 /*
3981  * goya_debugfs_read32 - read a 32bit value from a given device or a host mapped
3982  *                       address.
3983  *
3984  * @hdev:       pointer to hl_device structure
3985  * @addr:       device or host mapped address
3986  * @val:        returned value
3987  *
3988  * In case of DDR address that is not mapped into the default aperture that
3989  * the DDR bar exposes, the function will configure the iATU so that the DDR
3990  * bar will be positioned at a base address that allows reading from the
3991  * required address. Configuring the iATU during normal operation can
3992  * lead to undefined behavior and therefore, should be done with extreme care
3993  *
3994  */
3995 static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
3996 {
3997         struct asic_fixed_properties *prop = &hdev->asic_prop;
3998         u64 ddr_bar_addr;
3999         int rc = 0;
4000
4001         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4002                 *val = RREG32(addr - CFG_BASE);
4003
4004         } else if ((addr >= SRAM_BASE_ADDR) &&
4005                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4006
4007                 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4008                                 (addr - SRAM_BASE_ADDR));
4009
4010         } else if ((addr >= DRAM_PHYS_BASE) &&
4011                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4012
4013                 u64 bar_base_addr = DRAM_PHYS_BASE +
4014                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4015
4016                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4017                 if (ddr_bar_addr != U64_MAX) {
4018                         *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4019                                                 (addr - bar_base_addr));
4020
4021                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4022                                                         ddr_bar_addr);
4023                 }
4024                 if (ddr_bar_addr == U64_MAX)
4025                         rc = -EIO;
4026
4027         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4028                 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4029
4030         } else {
4031                 rc = -EFAULT;
4032         }
4033
4034         return rc;
4035 }
4036
4037 /*
4038  * goya_debugfs_write32 - write a 32bit value to a given device or a host mapped
4039  *                        address.
4040  *
4041  * @hdev:       pointer to hl_device structure
4042  * @addr:       device or host mapped address
4043  * @val:        returned value
4044  *
4045  * In case of DDR address that is not mapped into the default aperture that
4046  * the DDR bar exposes, the function will configure the iATU so that the DDR
4047  * bar will be positioned at a base address that allows writing to the
4048  * required address. Configuring the iATU during normal operation can
4049  * lead to undefined behavior and therefore, should be done with extreme care
4050  *
4051  */
4052 static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4053 {
4054         struct asic_fixed_properties *prop = &hdev->asic_prop;
4055         u64 ddr_bar_addr;
4056         int rc = 0;
4057
4058         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4059                 WREG32(addr - CFG_BASE, val);
4060
4061         } else if ((addr >= SRAM_BASE_ADDR) &&
4062                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4063
4064                 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4065                                         (addr - SRAM_BASE_ADDR));
4066
4067         } else if ((addr >= DRAM_PHYS_BASE) &&
4068                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4069
4070                 u64 bar_base_addr = DRAM_PHYS_BASE +
4071                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4072
4073                 ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
4074                 if (ddr_bar_addr != U64_MAX) {
4075                         writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4076                                                 (addr - bar_base_addr));
4077
4078                         ddr_bar_addr = goya_set_ddr_bar_base(hdev,
4079                                                         ddr_bar_addr);
4080                 }
4081                 if (ddr_bar_addr == U64_MAX)
4082                         rc = -EIO;
4083
4084         } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4085                 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4086
4087         } else {
4088                 rc = -EFAULT;
4089         }
4090
4091         return rc;
4092 }
4093
4094 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4095 {
4096         struct goya_device *goya = hdev->asic_specific;
4097
4098         if (hdev->hard_reset_pending)
4099                 return U64_MAX;
4100
4101         return readq(hdev->pcie_bar[DDR_BAR_ID] +
4102                         (addr - goya->ddr_bar_cur_addr));
4103 }
4104
4105 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4106 {
4107         struct goya_device *goya = hdev->asic_specific;
4108
4109         if (hdev->hard_reset_pending)
4110                 return;
4111
4112         writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4113                         (addr - goya->ddr_bar_cur_addr));
4114 }
4115
4116 static const char *_goya_get_event_desc(u16 event_type)
4117 {
4118         switch (event_type) {
4119         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4120                 return "PCIe_if";
4121         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4122         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4123         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4124         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4125         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4126         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4127         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4128         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4129                 return "TPC%d_ecc";
4130         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4131                 return "MME_ecc";
4132         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4133                 return "MME_ecc_ext";
4134         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4135                 return "MMU_ecc";
4136         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4137                 return "DMA_macro";
4138         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4139                 return "DMA_ecc";
4140         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4141                 return "CPU_if_ecc";
4142         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4143                 return "PSOC_mem";
4144         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4145                 return "PSOC_coresight";
4146         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4147                 return "SRAM%d";
4148         case GOYA_ASYNC_EVENT_ID_GIC500:
4149                 return "GIC500";
4150         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4151                 return "PLL%d";
4152         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4153                 return "AXI_ecc";
4154         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4155                 return "L2_ram_ecc";
4156         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4157                 return "PSOC_gpio_05_sw_reset";
4158         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4159                 return "PSOC_gpio_10_vrhot_icrit";
4160         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4161                 return "PCIe_dec";
4162         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4163         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4164         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4165         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4166         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4167         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4168         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4169         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4170                 return "TPC%d_dec";
4171         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4172                 return "MME_wacs";
4173         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4174                 return "MME_wacsd";
4175         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4176                 return "CPU_axi_splitter";
4177         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4178                 return "PSOC_axi_dec";
4179         case GOYA_ASYNC_EVENT_ID_PSOC:
4180                 return "PSOC";
4181         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4182         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4183         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4184         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4185         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4186         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4187         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4188         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4189                 return "TPC%d_krn_err";
4190         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4191                 return "TPC%d_cq";
4192         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4193                 return "TPC%d_qm";
4194         case GOYA_ASYNC_EVENT_ID_MME_QM:
4195                 return "MME_qm";
4196         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4197                 return "MME_cq";
4198         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4199                 return "DMA%d_qm";
4200         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4201                 return "DMA%d_ch";
4202         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4203         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4204         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4205         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4206         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4207         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4208         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4209         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4210                 return "TPC%d_bmon_spmu";
4211         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4212                 return "DMA_bm_ch%d";
4213         default:
4214                 return "N/A";
4215         }
4216 }
4217
4218 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4219 {
4220         u8 index;
4221
4222         switch (event_type) {
4223         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4224         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4225         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4226         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4227         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4228         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4229         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4230         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4231                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_ECC) / 3;
4232                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4233                 break;
4234         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4235                 index = event_type - GOYA_ASYNC_EVENT_ID_SRAM0;
4236                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4237                 break;
4238         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4239                 index = event_type - GOYA_ASYNC_EVENT_ID_PLL0;
4240                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4241                 break;
4242         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4243         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4244         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4245         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4246         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4247         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4248         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4249         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4250                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4251                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4252                 break;
4253         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4254         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4255         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4256         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4257         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4258         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4259         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4260         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4261                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4262                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4263                 break;
4264         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4265                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4266                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4267                 break;
4268         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4269                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4270                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4271                 break;
4272         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4273                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4274                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4275                 break;
4276         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4277                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4278                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4279                 break;
4280         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4281         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4282         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4283         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4284         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4285         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4286         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4287         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4288                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU) / 10;
4289                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4290                 break;
4291         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4292                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA_BM_CH0;
4293                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4294                 break;
4295         default:
4296                 snprintf(desc, size, _goya_get_event_desc(event_type));
4297                 break;
4298         }
4299 }
4300
4301 static void goya_print_razwi_info(struct hl_device *hdev)
4302 {
4303         if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4304                 dev_err(hdev->dev, "Illegal write to LBW\n");
4305                 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4306         }
4307
4308         if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4309                 dev_err(hdev->dev, "Illegal read from LBW\n");
4310                 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4311         }
4312
4313         if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4314                 dev_err(hdev->dev, "Illegal write to HBW\n");
4315                 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4316         }
4317
4318         if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4319                 dev_err(hdev->dev, "Illegal read from HBW\n");
4320                 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4321         }
4322 }
4323
4324 static void goya_print_mmu_error_info(struct hl_device *hdev)
4325 {
4326         struct goya_device *goya = hdev->asic_specific;
4327         u64 addr;
4328         u32 val;
4329
4330         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4331                 return;
4332
4333         val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4334         if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4335                 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4336                 addr <<= 32;
4337                 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4338
4339                 dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
4340
4341                 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4342         }
4343 }
4344
4345 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
4346                                 bool razwi)
4347 {
4348         char desc[20] = "";
4349
4350         goya_get_event_desc(event_type, desc, sizeof(desc));
4351         dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4352                 event_type, desc);
4353
4354         if (razwi) {
4355                 goya_print_razwi_info(hdev);
4356                 goya_print_mmu_error_info(hdev);
4357         }
4358 }
4359
4360 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4361                 size_t irq_arr_size)
4362 {
4363         struct armcp_unmask_irq_arr_packet *pkt;
4364         size_t total_pkt_size;
4365         long result;
4366         int rc;
4367         int irq_num_entries, irq_arr_index;
4368         __le32 *goya_irq_arr;
4369
4370         total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
4371                         irq_arr_size;
4372
4373         /* data should be aligned to 8 bytes in order to ArmCP to copy it */
4374         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4375
4376         /* total_pkt_size is casted to u16 later on */
4377         if (total_pkt_size > USHRT_MAX) {
4378                 dev_err(hdev->dev, "too many elements in IRQ array\n");
4379                 return -EINVAL;
4380         }
4381
4382         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4383         if (!pkt)
4384                 return -ENOMEM;
4385
4386         irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
4387         pkt->length = cpu_to_le32(irq_num_entries);
4388
4389         /* We must perform any necessary endianness conversation on the irq
4390          * array being passed to the goya hardware
4391          */
4392         for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
4393                         irq_arr_index < irq_num_entries ; irq_arr_index++)
4394                 goya_irq_arr[irq_arr_index] =
4395                                 cpu_to_le32(irq_arr[irq_arr_index]);
4396
4397         pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4398                                                 ARMCP_PKT_CTL_OPCODE_SHIFT);
4399
4400         rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size,
4401                         HL_DEVICE_TIMEOUT_USEC, &result);
4402
4403         if (rc)
4404                 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4405
4406         kfree(pkt);
4407
4408         return rc;
4409 }
4410
4411 static int goya_soft_reset_late_init(struct hl_device *hdev)
4412 {
4413         /*
4414          * Unmask all IRQs since some could have been received
4415          * during the soft reset
4416          */
4417         return goya_unmask_irq_arr(hdev, goya_all_events,
4418                                         sizeof(goya_all_events));
4419 }
4420
4421 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4422 {
4423         struct armcp_packet pkt;
4424         long result;
4425         int rc;
4426
4427         memset(&pkt, 0, sizeof(pkt));
4428
4429         pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
4430                                 ARMCP_PKT_CTL_OPCODE_SHIFT);
4431         pkt.value = cpu_to_le64(event_type);
4432
4433         rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4434                         HL_DEVICE_TIMEOUT_USEC, &result);
4435
4436         if (rc)
4437                 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4438
4439         return rc;
4440 }
4441
4442 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4443 {
4444         u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
4445         u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
4446                                 >> EQ_CTL_EVENT_TYPE_SHIFT);
4447         struct goya_device *goya = hdev->asic_specific;
4448
4449         goya->events_stat[event_type]++;
4450
4451         switch (event_type) {
4452         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4453         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4454         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4455         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4456         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4457         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4458         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4459         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4460         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4461         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4462         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4463         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4464         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4465         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4466         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4467         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4468         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4469         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4470         case GOYA_ASYNC_EVENT_ID_GIC500:
4471         case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
4472         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4473         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4474         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4475                 goya_print_irq_info(hdev, event_type, false);
4476                 hl_device_reset(hdev, true, false);
4477                 break;
4478
4479         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4480         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4481         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4482         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4483         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4484         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4485         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4486         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4487         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4488         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4489         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4490         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4491         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4492         case GOYA_ASYNC_EVENT_ID_PSOC:
4493         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4494         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4495         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4496         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4497         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4498         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4499         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4500         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4501         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4502         case GOYA_ASYNC_EVENT_ID_MME_QM:
4503         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4504         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4505         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4506                 goya_print_irq_info(hdev, event_type, true);
4507                 goya_unmask_irq(hdev, event_type);
4508                 break;
4509
4510         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4511         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4512         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4513         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4514         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4515         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4516         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4517         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4518         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4519         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4520                 goya_print_irq_info(hdev, event_type, false);
4521                 goya_unmask_irq(hdev, event_type);
4522                 break;
4523
4524         default:
4525                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4526                                 event_type);
4527                 break;
4528         }
4529 }
4530
4531 void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
4532 {
4533         struct goya_device *goya = hdev->asic_specific;
4534
4535         *size = (u32) sizeof(goya->events_stat);
4536
4537         return goya->events_stat;
4538 }
4539
4540 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
4541                                 u64 val, bool is_dram)
4542 {
4543         struct packet_lin_dma *lin_dma_pkt;
4544         struct hl_cs_job *job;
4545         u32 cb_size, ctl;
4546         struct hl_cb *cb;
4547         int rc, lin_dma_pkts_cnt;
4548
4549         lin_dma_pkts_cnt = DIV_ROUND_UP_ULL(size, SZ_2G);
4550         cb_size = lin_dma_pkts_cnt * sizeof(struct packet_lin_dma) +
4551                                                 sizeof(struct packet_msg_prot);
4552         cb = hl_cb_kernel_create(hdev, cb_size);
4553         if (!cb)
4554                 return -ENOMEM;
4555
4556         lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4557
4558         do {
4559                 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4560
4561                 ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4562                                 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4563                                 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4564                                 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4565                                 (1 << GOYA_PKT_CTL_MB_SHIFT));
4566                 ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4567                                 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4568                 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4569
4570                 lin_dma_pkt->src_addr = cpu_to_le64(val);
4571                 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
4572                 if (lin_dma_pkts_cnt > 1)
4573                         lin_dma_pkt->tsize = cpu_to_le32(SZ_2G);
4574                 else
4575                         lin_dma_pkt->tsize = cpu_to_le32(size);
4576
4577                 size -= SZ_2G;
4578                 addr += SZ_2G;
4579                 lin_dma_pkt++;
4580         } while (--lin_dma_pkts_cnt);
4581
4582         job = hl_cs_allocate_job(hdev, true);
4583         if (!job) {
4584                 dev_err(hdev->dev, "Failed to allocate a new job\n");
4585                 rc = -ENOMEM;
4586                 goto release_cb;
4587         }
4588
4589         job->id = 0;
4590         job->user_cb = cb;
4591         job->user_cb->cs_cnt++;
4592         job->user_cb_size = cb_size;
4593         job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4594         job->patched_cb = job->user_cb;
4595         job->job_cb_size = job->user_cb_size;
4596
4597         hl_debugfs_add_job(hdev, job);
4598
4599         rc = goya_send_job_on_qman0(hdev, job);
4600
4601         hl_cb_put(job->patched_cb);
4602
4603         hl_debugfs_remove_job(hdev, job);
4604         kfree(job);
4605         cb->cs_cnt--;
4606
4607 release_cb:
4608         hl_cb_put(cb);
4609         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4610
4611         return rc;
4612 }
4613
4614 int goya_context_switch(struct hl_device *hdev, u32 asid)
4615 {
4616         struct asic_fixed_properties *prop = &hdev->asic_prop;
4617         u64 addr = prop->sram_base_address, sob_addr;
4618         u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4619         u64 val = 0x7777777777777777ull;
4620         int rc, dma_id;
4621         u32 channel_off = mmDMA_CH_1_WR_COMP_ADDR_LO -
4622                                         mmDMA_CH_0_WR_COMP_ADDR_LO;
4623
4624         rc = goya_memset_device_memory(hdev, addr, size, val, false);
4625         if (rc) {
4626                 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4627                 return rc;
4628         }
4629
4630         /* we need to reset registers that the user is allowed to change */
4631         sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1007;
4632         WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO, lower_32_bits(sob_addr));
4633
4634         for (dma_id = 1 ; dma_id < NUMBER_OF_EXT_HW_QUEUES ; dma_id++) {
4635                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
4636                                                         (dma_id - 1) * 4;
4637                 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + channel_off * dma_id,
4638                                                 lower_32_bits(sob_addr));
4639         }
4640
4641         WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
4642
4643         goya_mmu_prepare(hdev, asid);
4644
4645         goya_clear_sm_regs(hdev);
4646
4647         return 0;
4648 }
4649
4650 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4651 {
4652         struct asic_fixed_properties *prop = &hdev->asic_prop;
4653         struct goya_device *goya = hdev->asic_specific;
4654         u64 addr = prop->mmu_pgt_addr;
4655         u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4656                         MMU_CACHE_MNG_SIZE;
4657
4658         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4659                 return 0;
4660
4661         return goya_memset_device_memory(hdev, addr, size, 0, true);
4662 }
4663
4664 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4665 {
4666         struct goya_device *goya = hdev->asic_specific;
4667         u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4668         u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4669         u64 val = 0x9999999999999999ull;
4670
4671         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4672                 return 0;
4673
4674         return goya_memset_device_memory(hdev, addr, size, val, true);
4675 }
4676
4677 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
4678 {
4679         struct asic_fixed_properties *prop = &hdev->asic_prop;
4680         struct goya_device *goya = hdev->asic_specific;
4681         s64 off, cpu_off;
4682         int rc;
4683
4684         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4685                 return 0;
4686
4687         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
4688                 rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
4689                                 prop->dram_base_address + off, PAGE_SIZE_2MB);
4690                 if (rc) {
4691                         dev_err(hdev->dev, "Map failed for address 0x%llx\n",
4692                                 prop->dram_base_address + off);
4693                         goto unmap;
4694                 }
4695         }
4696
4697         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4698                 rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4699                         hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
4700
4701                 if (rc) {
4702                         dev_err(hdev->dev,
4703                                 "Map failed for CPU accessible memory\n");
4704                         off -= PAGE_SIZE_2MB;
4705                         goto unmap;
4706                 }
4707         } else {
4708                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
4709                         rc = hl_mmu_map(hdev->kernel_ctx,
4710                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4711                                 hdev->cpu_accessible_dma_address + cpu_off,
4712                                 PAGE_SIZE_4KB);
4713                         if (rc) {
4714                                 dev_err(hdev->dev,
4715                                         "Map failed for CPU accessible memory\n");
4716                                 cpu_off -= PAGE_SIZE_4KB;
4717                                 goto unmap_cpu;
4718                         }
4719                 }
4720         }
4721
4722         goya_mmu_prepare_reg(hdev, mmCPU_IF_ARUSER_OVR, HL_KERNEL_ASID_ID);
4723         goya_mmu_prepare_reg(hdev, mmCPU_IF_AWUSER_OVR, HL_KERNEL_ASID_ID);
4724         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0x7FF);
4725         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0x7FF);
4726
4727         /* Make sure configuration is flushed to device */
4728         RREG32(mmCPU_IF_AWUSER_OVR_EN);
4729
4730         goya->device_cpu_mmu_mappings_done = true;
4731
4732         return 0;
4733
4734 unmap_cpu:
4735         for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
4736                 if (hl_mmu_unmap(hdev->kernel_ctx,
4737                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4738                                 PAGE_SIZE_4KB))
4739                         dev_warn_ratelimited(hdev->dev,
4740                                 "failed to unmap address 0x%llx\n",
4741                                 VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4742 unmap:
4743         for (; off >= 0 ; off -= PAGE_SIZE_2MB)
4744                 if (hl_mmu_unmap(hdev->kernel_ctx,
4745                                 prop->dram_base_address + off, PAGE_SIZE_2MB))
4746                         dev_warn_ratelimited(hdev->dev,
4747                                 "failed to unmap address 0x%llx\n",
4748                                 prop->dram_base_address + off);
4749
4750         return rc;
4751 }
4752
4753 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
4754 {
4755         struct asic_fixed_properties *prop = &hdev->asic_prop;
4756         struct goya_device *goya = hdev->asic_specific;
4757         u32 off, cpu_off;
4758
4759         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4760                 return;
4761
4762         if (!goya->device_cpu_mmu_mappings_done)
4763                 return;
4764
4765         WREG32(mmCPU_IF_ARUSER_OVR_EN, 0);
4766         WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
4767
4768         if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
4769                 if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
4770                                 PAGE_SIZE_2MB))
4771                         dev_warn(hdev->dev,
4772                                 "Failed to unmap CPU accessible memory\n");
4773         } else {
4774                 for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
4775                         if (hl_mmu_unmap(hdev->kernel_ctx,
4776                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
4777                                         PAGE_SIZE_4KB))
4778                                 dev_warn_ratelimited(hdev->dev,
4779                                         "failed to unmap address 0x%llx\n",
4780                                         VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
4781         }
4782
4783         for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
4784                 if (hl_mmu_unmap(hdev->kernel_ctx,
4785                                 prop->dram_base_address + off, PAGE_SIZE_2MB))
4786                         dev_warn_ratelimited(hdev->dev,
4787                                         "Failed to unmap address 0x%llx\n",
4788                                         prop->dram_base_address + off);
4789
4790         goya->device_cpu_mmu_mappings_done = false;
4791 }
4792
4793 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4794 {
4795         struct goya_device *goya = hdev->asic_specific;
4796         int i;
4797
4798         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4799                 return;
4800
4801         if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4802                 WARN(1, "asid %u is too big\n", asid);
4803                 return;
4804         }
4805
4806         /* zero the MMBP and ASID bits and then set the ASID */
4807         for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
4808                 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
4809 }
4810
4811 static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
4812 {
4813         struct goya_device *goya = hdev->asic_specific;
4814         u32 status, timeout_usec;
4815         int rc;
4816
4817         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4818                 return;
4819
4820         /* no need in L1 only invalidation in Goya */
4821         if (!is_hard)
4822                 return;
4823
4824         if (hdev->pldm)
4825                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4826         else
4827                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4828
4829         mutex_lock(&hdev->mmu_cache_lock);
4830
4831         /* L0 & L1 invalidation */
4832         WREG32(mmSTLB_INV_ALL_START, 1);
4833
4834         rc = hl_poll_timeout(
4835                 hdev,
4836                 mmSTLB_INV_ALL_START,
4837                 status,
4838                 !status,
4839                 1000,
4840                 timeout_usec);
4841
4842         mutex_unlock(&hdev->mmu_cache_lock);
4843
4844         if (rc)
4845                 dev_notice_ratelimited(hdev->dev,
4846                         "Timeout when waiting for MMU cache invalidation\n");
4847 }
4848
4849 static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
4850                 bool is_hard, u32 asid, u64 va, u64 size)
4851 {
4852         struct goya_device *goya = hdev->asic_specific;
4853         u32 status, timeout_usec, inv_data, pi;
4854         int rc;
4855
4856         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4857                 return;
4858
4859         /* no need in L1 only invalidation in Goya */
4860         if (!is_hard)
4861                 return;
4862
4863         if (hdev->pldm)
4864                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4865         else
4866                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4867
4868         mutex_lock(&hdev->mmu_cache_lock);
4869
4870         /*
4871          * TODO: currently invalidate entire L0 & L1 as in regular hard
4872          * invalidation. Need to apply invalidation of specific cache lines with
4873          * mask of ASID & VA & size.
4874          * Note that L1 with be flushed entirely in any case.
4875          */
4876
4877         /* L0 & L1 invalidation */
4878         inv_data = RREG32(mmSTLB_CACHE_INV);
4879         /* PI is 8 bit */
4880         pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
4881         WREG32(mmSTLB_CACHE_INV,
4882                         (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
4883
4884         rc = hl_poll_timeout(
4885                 hdev,
4886                 mmSTLB_INV_CONSUMER_INDEX,
4887                 status,
4888                 status == pi,
4889                 1000,
4890                 timeout_usec);
4891
4892         mutex_unlock(&hdev->mmu_cache_lock);
4893
4894         if (rc)
4895                 dev_notice_ratelimited(hdev->dev,
4896                         "Timeout when waiting for MMU cache invalidation\n");
4897 }
4898
4899 int goya_send_heartbeat(struct hl_device *hdev)
4900 {
4901         struct goya_device *goya = hdev->asic_specific;
4902
4903         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
4904                 return 0;
4905
4906         return hl_fw_send_heartbeat(hdev);
4907 }
4908
4909 int goya_armcp_info_get(struct hl_device *hdev)
4910 {
4911         struct goya_device *goya = hdev->asic_specific;
4912         struct asic_fixed_properties *prop = &hdev->asic_prop;
4913         u64 dram_size;
4914         int rc;
4915
4916         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
4917                 return 0;
4918
4919         rc = hl_fw_armcp_info_get(hdev);
4920         if (rc)
4921                 return rc;
4922
4923         dram_size = le64_to_cpu(prop->armcp_info.dram_size);
4924         if (dram_size) {
4925                 if ((!is_power_of_2(dram_size)) ||
4926                                 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
4927                         dev_err(hdev->dev,
4928                                 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
4929                                 dram_size);
4930                         dram_size = DRAM_PHYS_DEFAULT_SIZE;
4931                 }
4932
4933                 prop->dram_size = dram_size;
4934                 prop->dram_end_address = prop->dram_base_address + dram_size;
4935         }
4936
4937         return 0;
4938 }
4939
4940 static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
4941                                 struct seq_file *s)
4942 {
4943         const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
4944         const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
4945         u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
4946                 mme_arch_sts;
4947         bool is_idle = true, is_eng_idle;
4948         u64 offset;
4949         int i;
4950
4951         if (s)
4952                 seq_puts(s, "\nDMA  is_idle  QM_GLBL_STS0  DMA_CORE_STS0\n"
4953                                 "---  -------  ------------  -------------\n");
4954
4955         offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
4956
4957         for (i = 0 ; i < DMA_MAX_NUM ; i++) {
4958                 qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
4959                 dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
4960                 is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
4961                                 IS_DMA_IDLE(dma_core_sts0);
4962                 is_idle &= is_eng_idle;
4963
4964                 if (mask)
4965                         *mask |= !is_eng_idle << (GOYA_ENGINE_ID_DMA_0 + i);
4966                 if (s)
4967                         seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
4968                                         qm_glbl_sts0, dma_core_sts0);
4969         }
4970
4971         if (s)
4972                 seq_puts(s,
4973                         "\nTPC  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  CFG_STATUS\n"
4974                         "---  -------  ------------  --------------  ----------\n");
4975
4976         offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
4977
4978         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
4979                 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
4980                 cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
4981                 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
4982                 is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
4983                                 IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
4984                                 IS_TPC_IDLE(tpc_cfg_sts);
4985                 is_idle &= is_eng_idle;
4986
4987                 if (mask)
4988                         *mask |= !is_eng_idle << (GOYA_ENGINE_ID_TPC_0 + i);
4989                 if (s)
4990                         seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
4991                                 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
4992         }
4993
4994         if (s)
4995                 seq_puts(s,
4996                         "\nMME  is_idle  QM_GLBL_STS0  CMDQ_GLBL_STS0  ARCH_STATUS\n"
4997                         "---  -------  ------------  --------------  -----------\n");
4998
4999         qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
5000         cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
5001         mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
5002         is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
5003                         IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
5004                         IS_MME_IDLE(mme_arch_sts);
5005         is_idle &= is_eng_idle;
5006
5007         if (mask)
5008                 *mask |= !is_eng_idle << GOYA_ENGINE_ID_MME_0;
5009         if (s) {
5010                 seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
5011                                 cmdq_glbl_sts0, mme_arch_sts);
5012                 seq_puts(s, "\n");
5013         }
5014
5015         return is_idle;
5016 }
5017
5018 static void goya_hw_queues_lock(struct hl_device *hdev)
5019 {
5020         struct goya_device *goya = hdev->asic_specific;
5021
5022         spin_lock(&goya->hw_queues_lock);
5023 }
5024
5025 static void goya_hw_queues_unlock(struct hl_device *hdev)
5026 {
5027         struct goya_device *goya = hdev->asic_specific;
5028
5029         spin_unlock(&goya->hw_queues_lock);
5030 }
5031
5032 static u32 goya_get_pci_id(struct hl_device *hdev)
5033 {
5034         return hdev->pdev->device;
5035 }
5036
5037 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5038                                 size_t max_size)
5039 {
5040         struct goya_device *goya = hdev->asic_specific;
5041
5042         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5043                 return 0;
5044
5045         return hl_fw_get_eeprom_data(hdev, data, max_size);
5046 }
5047
5048 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5049 {
5050         return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS);
5051 }
5052
5053 static const struct hl_asic_funcs goya_funcs = {
5054         .early_init = goya_early_init,
5055         .early_fini = goya_early_fini,
5056         .late_init = goya_late_init,
5057         .late_fini = goya_late_fini,
5058         .sw_init = goya_sw_init,
5059         .sw_fini = goya_sw_fini,
5060         .hw_init = goya_hw_init,
5061         .hw_fini = goya_hw_fini,
5062         .halt_engines = goya_halt_engines,
5063         .suspend = goya_suspend,
5064         .resume = goya_resume,
5065         .cb_mmap = goya_cb_mmap,
5066         .ring_doorbell = goya_ring_doorbell,
5067         .pqe_write = goya_pqe_write,
5068         .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
5069         .asic_dma_free_coherent = goya_dma_free_coherent,
5070         .get_int_queue_base = goya_get_int_queue_base,
5071         .test_queues = goya_test_queues,
5072         .asic_dma_pool_zalloc = goya_dma_pool_zalloc,
5073         .asic_dma_pool_free = goya_dma_pool_free,
5074         .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5075         .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5076         .hl_dma_unmap_sg = goya_dma_unmap_sg,
5077         .cs_parser = goya_cs_parser,
5078         .asic_dma_map_sg = goya_dma_map_sg,
5079         .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5080         .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5081         .update_eq_ci = goya_update_eq_ci,
5082         .context_switch = goya_context_switch,
5083         .restore_phase_topology = goya_restore_phase_topology,
5084         .debugfs_read32 = goya_debugfs_read32,
5085         .debugfs_write32 = goya_debugfs_write32,
5086         .add_device_attr = goya_add_device_attr,
5087         .handle_eqe = goya_handle_eqe,
5088         .set_pll_profile = goya_set_pll_profile,
5089         .get_events_stat = goya_get_events_stat,
5090         .read_pte = goya_read_pte,
5091         .write_pte = goya_write_pte,
5092         .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5093         .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5094         .send_heartbeat = goya_send_heartbeat,
5095         .debug_coresight = goya_debug_coresight,
5096         .is_device_idle = goya_is_device_idle,
5097         .soft_reset_late_init = goya_soft_reset_late_init,
5098         .hw_queues_lock = goya_hw_queues_lock,
5099         .hw_queues_unlock = goya_hw_queues_unlock,
5100         .get_pci_id = goya_get_pci_id,
5101         .get_eeprom_data = goya_get_eeprom_data,
5102         .send_cpu_message = goya_send_cpu_message,
5103         .get_hw_state = goya_get_hw_state,
5104         .pci_bars_map = goya_pci_bars_map,
5105         .set_dram_bar_base = goya_set_ddr_bar_base,
5106         .init_iatu = goya_init_iatu,
5107         .rreg = hl_rreg,
5108         .wreg = hl_wreg,
5109         .halt_coresight = goya_halt_coresight
5110 };
5111
5112 /*
5113  * goya_set_asic_funcs - set Goya function pointers
5114  *
5115  * @*hdev: pointer to hl_device structure
5116  *
5117  */
5118 void goya_set_asic_funcs(struct hl_device *hdev)
5119 {
5120         hdev->asic_funcs = &goya_funcs;
5121 }