habanalabs: add MMU DRAM default page mapping
[linux-2.6-microblaze.git] / drivers / misc / habanalabs / goya / goya.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7
8 #include "goyaP.h"
9 #include "include/hw_ip/mmu/mmu_general.h"
10 #include "include/hw_ip/mmu/mmu_v1_0.h"
11 #include "include/goya/asic_reg/goya_masks.h"
12
13 #include <linux/pci.h>
14 #include <linux/genalloc.h>
15 #include <linux/firmware.h>
16 #include <linux/hwmon.h>
17 #include <linux/io-64-nonatomic-lo-hi.h>
18 #include <linux/io-64-nonatomic-hi-lo.h>
19
20 /*
21  * GOYA security scheme:
22  *
23  * 1. Host is protected by:
24  *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
25  *        - MMU
26  *
27  * 2. DRAM is protected by:
28  *        - Range registers (protect the first 512MB)
29  *        - MMU (isolation between users)
30  *
31  * 3. Configuration is protected by:
32  *        - Range registers
33  *        - Protection bits
34  *
35  * When MMU is disabled:
36  *
37  * QMAN DMA: PQ, CQ, CP, DMA are secured.
38  * PQ, CB and the data are on the host.
39  *
40  * QMAN TPC/MME:
41  * PQ, CQ and CP are not secured.
42  * PQ, CB and the data are on the SRAM/DRAM.
43  *
44  * Since QMAN DMA is secured, KMD is parsing the DMA CB:
45  *     - KMD checks DMA pointer
46  *     - WREG, MSG_PROT are not allowed.
47  *     - MSG_LONG/SHORT are allowed.
48  *
49  * A read/write transaction by the QMAN to a protected area will succeed if
50  * and only if the QMAN's CP is secured and MSG_PROT is used
51  *
52  *
53  * When MMU is enabled:
54  *
55  * QMAN DMA: PQ, CQ and CP are secured.
56  * MMU is set to bypass on the Secure props register of the QMAN.
57  * The reasons we don't enable MMU for PQ, CQ and CP are:
58  *     - PQ entry is in kernel address space and KMD doesn't map it.
59  *     - CP writes to MSIX register and to kernel address space (completion
60  *       queue).
61  *
62  * DMA is not secured but because CP is secured, KMD still needs to parse the
63  * CB, but doesn't need to check the DMA addresses.
64  *
65  * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
66  * doesn't map memory in MMU.
67  *
68  * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
69  *
70  * DMA RR does NOT protect host because DMA is not secured
71  *
72  */
73
74 #define GOYA_MMU_REGS_NUM               61
75
76 #define GOYA_DMA_POOL_BLK_SIZE          0x100           /* 256 bytes */
77
78 #define GOYA_RESET_TIMEOUT_MSEC         500             /* 500ms */
79 #define GOYA_PLDM_RESET_TIMEOUT_MSEC    20000           /* 20s */
80 #define GOYA_RESET_WAIT_MSEC            1               /* 1ms */
81 #define GOYA_CPU_RESET_WAIT_MSEC        100             /* 100ms */
82 #define GOYA_PLDM_RESET_WAIT_MSEC       1000            /* 1s */
83 #define GOYA_CPU_TIMEOUT_USEC           10000000        /* 10s */
84 #define GOYA_TEST_QUEUE_WAIT_USEC       100000          /* 100ms */
85 #define GOYA_PLDM_MMU_TIMEOUT_USEC      (MMU_CONFIG_TIMEOUT_USEC * 100)
86
87 #define GOYA_QMAN0_FENCE_VAL            0xD169B243
88
89 #define GOYA_MAX_INITIATORS             20
90
91 #define GOYA_MAX_STRING_LEN             20
92
93 #define GOYA_CB_POOL_CB_CNT             512
94 #define GOYA_CB_POOL_CB_SIZE            0x20000         /* 128KB */
95
96 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
97                 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
98                 "goya cq 4", "goya cpu eq"
99 };
100
101 static u16 goya_packet_sizes[MAX_PACKET_ID] = {
102         [PACKET_WREG_32]        = sizeof(struct packet_wreg32),
103         [PACKET_WREG_BULK]      = sizeof(struct packet_wreg_bulk),
104         [PACKET_MSG_LONG]       = sizeof(struct packet_msg_long),
105         [PACKET_MSG_SHORT]      = sizeof(struct packet_msg_short),
106         [PACKET_CP_DMA]         = sizeof(struct packet_cp_dma),
107         [PACKET_MSG_PROT]       = sizeof(struct packet_msg_prot),
108         [PACKET_FENCE]          = sizeof(struct packet_fence),
109         [PACKET_LIN_DMA]        = sizeof(struct packet_lin_dma),
110         [PACKET_NOP]            = sizeof(struct packet_nop),
111         [PACKET_STOP]           = sizeof(struct packet_stop)
112 };
113
114 static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
115         mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
116         mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
117         mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
118         mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
119         mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
120         mmTPC0_QM_GLBL_SECURE_PROPS,
121         mmTPC0_QM_GLBL_NON_SECURE_PROPS,
122         mmTPC0_CMDQ_GLBL_SECURE_PROPS,
123         mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
124         mmTPC0_CFG_ARUSER,
125         mmTPC0_CFG_AWUSER,
126         mmTPC1_QM_GLBL_SECURE_PROPS,
127         mmTPC1_QM_GLBL_NON_SECURE_PROPS,
128         mmTPC1_CMDQ_GLBL_SECURE_PROPS,
129         mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
130         mmTPC1_CFG_ARUSER,
131         mmTPC1_CFG_AWUSER,
132         mmTPC2_QM_GLBL_SECURE_PROPS,
133         mmTPC2_QM_GLBL_NON_SECURE_PROPS,
134         mmTPC2_CMDQ_GLBL_SECURE_PROPS,
135         mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
136         mmTPC2_CFG_ARUSER,
137         mmTPC2_CFG_AWUSER,
138         mmTPC3_QM_GLBL_SECURE_PROPS,
139         mmTPC3_QM_GLBL_NON_SECURE_PROPS,
140         mmTPC3_CMDQ_GLBL_SECURE_PROPS,
141         mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
142         mmTPC3_CFG_ARUSER,
143         mmTPC3_CFG_AWUSER,
144         mmTPC4_QM_GLBL_SECURE_PROPS,
145         mmTPC4_QM_GLBL_NON_SECURE_PROPS,
146         mmTPC4_CMDQ_GLBL_SECURE_PROPS,
147         mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
148         mmTPC4_CFG_ARUSER,
149         mmTPC4_CFG_AWUSER,
150         mmTPC5_QM_GLBL_SECURE_PROPS,
151         mmTPC5_QM_GLBL_NON_SECURE_PROPS,
152         mmTPC5_CMDQ_GLBL_SECURE_PROPS,
153         mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
154         mmTPC5_CFG_ARUSER,
155         mmTPC5_CFG_AWUSER,
156         mmTPC6_QM_GLBL_SECURE_PROPS,
157         mmTPC6_QM_GLBL_NON_SECURE_PROPS,
158         mmTPC6_CMDQ_GLBL_SECURE_PROPS,
159         mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
160         mmTPC6_CFG_ARUSER,
161         mmTPC6_CFG_AWUSER,
162         mmTPC7_QM_GLBL_SECURE_PROPS,
163         mmTPC7_QM_GLBL_NON_SECURE_PROPS,
164         mmTPC7_CMDQ_GLBL_SECURE_PROPS,
165         mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
166         mmTPC7_CFG_ARUSER,
167         mmTPC7_CFG_AWUSER,
168         mmMME_QM_GLBL_SECURE_PROPS,
169         mmMME_QM_GLBL_NON_SECURE_PROPS,
170         mmMME_CMDQ_GLBL_SECURE_PROPS,
171         mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
172         mmMME_SBA_CONTROL_DATA,
173         mmMME_SBB_CONTROL_DATA,
174         mmMME_SBC_CONTROL_DATA,
175         mmMME_WBC_CONTROL_DATA
176 };
177
178 #define GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE 121
179
180 static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
181         GOYA_ASYNC_EVENT_ID_PCIE_IF,
182         GOYA_ASYNC_EVENT_ID_TPC0_ECC,
183         GOYA_ASYNC_EVENT_ID_TPC1_ECC,
184         GOYA_ASYNC_EVENT_ID_TPC2_ECC,
185         GOYA_ASYNC_EVENT_ID_TPC3_ECC,
186         GOYA_ASYNC_EVENT_ID_TPC4_ECC,
187         GOYA_ASYNC_EVENT_ID_TPC5_ECC,
188         GOYA_ASYNC_EVENT_ID_TPC6_ECC,
189         GOYA_ASYNC_EVENT_ID_TPC7_ECC,
190         GOYA_ASYNC_EVENT_ID_MME_ECC,
191         GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
192         GOYA_ASYNC_EVENT_ID_MMU_ECC,
193         GOYA_ASYNC_EVENT_ID_DMA_MACRO,
194         GOYA_ASYNC_EVENT_ID_DMA_ECC,
195         GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
196         GOYA_ASYNC_EVENT_ID_PSOC_MEM,
197         GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
198         GOYA_ASYNC_EVENT_ID_SRAM0,
199         GOYA_ASYNC_EVENT_ID_SRAM1,
200         GOYA_ASYNC_EVENT_ID_SRAM2,
201         GOYA_ASYNC_EVENT_ID_SRAM3,
202         GOYA_ASYNC_EVENT_ID_SRAM4,
203         GOYA_ASYNC_EVENT_ID_SRAM5,
204         GOYA_ASYNC_EVENT_ID_SRAM6,
205         GOYA_ASYNC_EVENT_ID_SRAM7,
206         GOYA_ASYNC_EVENT_ID_SRAM8,
207         GOYA_ASYNC_EVENT_ID_SRAM9,
208         GOYA_ASYNC_EVENT_ID_SRAM10,
209         GOYA_ASYNC_EVENT_ID_SRAM11,
210         GOYA_ASYNC_EVENT_ID_SRAM12,
211         GOYA_ASYNC_EVENT_ID_SRAM13,
212         GOYA_ASYNC_EVENT_ID_SRAM14,
213         GOYA_ASYNC_EVENT_ID_SRAM15,
214         GOYA_ASYNC_EVENT_ID_SRAM16,
215         GOYA_ASYNC_EVENT_ID_SRAM17,
216         GOYA_ASYNC_EVENT_ID_SRAM18,
217         GOYA_ASYNC_EVENT_ID_SRAM19,
218         GOYA_ASYNC_EVENT_ID_SRAM20,
219         GOYA_ASYNC_EVENT_ID_SRAM21,
220         GOYA_ASYNC_EVENT_ID_SRAM22,
221         GOYA_ASYNC_EVENT_ID_SRAM23,
222         GOYA_ASYNC_EVENT_ID_SRAM24,
223         GOYA_ASYNC_EVENT_ID_SRAM25,
224         GOYA_ASYNC_EVENT_ID_SRAM26,
225         GOYA_ASYNC_EVENT_ID_SRAM27,
226         GOYA_ASYNC_EVENT_ID_SRAM28,
227         GOYA_ASYNC_EVENT_ID_SRAM29,
228         GOYA_ASYNC_EVENT_ID_GIC500,
229         GOYA_ASYNC_EVENT_ID_PLL0,
230         GOYA_ASYNC_EVENT_ID_PLL1,
231         GOYA_ASYNC_EVENT_ID_PLL3,
232         GOYA_ASYNC_EVENT_ID_PLL4,
233         GOYA_ASYNC_EVENT_ID_PLL5,
234         GOYA_ASYNC_EVENT_ID_PLL6,
235         GOYA_ASYNC_EVENT_ID_AXI_ECC,
236         GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
237         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
238         GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
239         GOYA_ASYNC_EVENT_ID_PCIE_DEC,
240         GOYA_ASYNC_EVENT_ID_TPC0_DEC,
241         GOYA_ASYNC_EVENT_ID_TPC1_DEC,
242         GOYA_ASYNC_EVENT_ID_TPC2_DEC,
243         GOYA_ASYNC_EVENT_ID_TPC3_DEC,
244         GOYA_ASYNC_EVENT_ID_TPC4_DEC,
245         GOYA_ASYNC_EVENT_ID_TPC5_DEC,
246         GOYA_ASYNC_EVENT_ID_TPC6_DEC,
247         GOYA_ASYNC_EVENT_ID_TPC7_DEC,
248         GOYA_ASYNC_EVENT_ID_MME_WACS,
249         GOYA_ASYNC_EVENT_ID_MME_WACSD,
250         GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
251         GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
252         GOYA_ASYNC_EVENT_ID_PSOC,
253         GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
254         GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
255         GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
256         GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
257         GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
258         GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
259         GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
260         GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
261         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
262         GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
263         GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
264         GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
265         GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
266         GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
267         GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
268         GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
269         GOYA_ASYNC_EVENT_ID_TPC0_QM,
270         GOYA_ASYNC_EVENT_ID_TPC1_QM,
271         GOYA_ASYNC_EVENT_ID_TPC2_QM,
272         GOYA_ASYNC_EVENT_ID_TPC3_QM,
273         GOYA_ASYNC_EVENT_ID_TPC4_QM,
274         GOYA_ASYNC_EVENT_ID_TPC5_QM,
275         GOYA_ASYNC_EVENT_ID_TPC6_QM,
276         GOYA_ASYNC_EVENT_ID_TPC7_QM,
277         GOYA_ASYNC_EVENT_ID_MME_QM,
278         GOYA_ASYNC_EVENT_ID_MME_CMDQ,
279         GOYA_ASYNC_EVENT_ID_DMA0_QM,
280         GOYA_ASYNC_EVENT_ID_DMA1_QM,
281         GOYA_ASYNC_EVENT_ID_DMA2_QM,
282         GOYA_ASYNC_EVENT_ID_DMA3_QM,
283         GOYA_ASYNC_EVENT_ID_DMA4_QM,
284         GOYA_ASYNC_EVENT_ID_DMA0_CH,
285         GOYA_ASYNC_EVENT_ID_DMA1_CH,
286         GOYA_ASYNC_EVENT_ID_DMA2_CH,
287         GOYA_ASYNC_EVENT_ID_DMA3_CH,
288         GOYA_ASYNC_EVENT_ID_DMA4_CH,
289         GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
290         GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
291         GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
292         GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
293         GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
294         GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
295         GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
296         GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
297         GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
298         GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
299         GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
300         GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
301         GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
302 };
303
304 static int goya_armcp_info_get(struct hl_device *hdev);
305 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
306 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
307 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
308 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
309                                         u64 phys_addr);
310
311 static void goya_get_fixed_properties(struct hl_device *hdev)
312 {
313         struct asic_fixed_properties *prop = &hdev->asic_prop;
314         int i;
315
316         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
317                 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
318                 prop->hw_queues_props[i].kmd_only = 0;
319         }
320
321         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
322                 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
323                 prop->hw_queues_props[i].kmd_only = 1;
324         }
325
326         for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
327                         NUMBER_OF_INT_HW_QUEUES; i++) {
328                 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
329                 prop->hw_queues_props[i].kmd_only = 0;
330         }
331
332         for (; i < HL_MAX_QUEUES; i++)
333                 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
334
335         prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
336
337         prop->dram_base_address = DRAM_PHYS_BASE;
338         prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
339         prop->dram_end_address = prop->dram_base_address + prop->dram_size;
340         prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
341
342         prop->sram_base_address = SRAM_BASE_ADDR;
343         prop->sram_size = SRAM_SIZE;
344         prop->sram_end_address = prop->sram_base_address + prop->sram_size;
345         prop->sram_user_base_address = prop->sram_base_address +
346                                                 SRAM_USER_BASE_OFFSET;
347
348         prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
349         prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
350         if (hdev->pldm)
351                 prop->mmu_pgt_size = 0x800000; /* 8MB */
352         else
353                 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
354         prop->mmu_pte_size = HL_PTE_SIZE;
355         prop->mmu_hop_table_size = HOP_TABLE_SIZE;
356         prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
357         prop->dram_page_size = PAGE_SIZE_2MB;
358
359         prop->host_phys_base_address = HOST_PHYS_BASE;
360         prop->va_space_host_start_address = VA_HOST_SPACE_START;
361         prop->va_space_host_end_address = VA_HOST_SPACE_END;
362         prop->va_space_dram_start_address = VA_DDR_SPACE_START;
363         prop->va_space_dram_end_address = VA_DDR_SPACE_END;
364         prop->dram_size_for_default_page_mapping =
365                         prop->va_space_dram_end_address;
366         prop->cfg_size = CFG_SIZE;
367         prop->max_asid = MAX_ASID;
368         prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
369         prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
370         prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
371         prop->max_power_default = MAX_POWER_DEFAULT;
372         prop->tpc_enabled_mask = TPC_ENABLED_MASK;
373
374         prop->high_pll = PLL_HIGH_DEFAULT;
375 }
376
377 int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
378 {
379         struct armcp_packet pkt;
380
381         memset(&pkt, 0, sizeof(pkt));
382
383         pkt.ctl = opcode << ARMCP_PKT_CTL_OPCODE_SHIFT;
384
385         return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
386                         sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
387 }
388
389 /*
390  * goya_pci_bars_map - Map PCI BARS of Goya device
391  *
392  * @hdev: pointer to hl_device structure
393  *
394  * Request PCI regions and map them to kernel virtual addresses.
395  * Returns 0 on success
396  *
397  */
398 static int goya_pci_bars_map(struct hl_device *hdev)
399 {
400         struct pci_dev *pdev = hdev->pdev;
401         int rc;
402
403         rc = pci_request_regions(pdev, HL_NAME);
404         if (rc) {
405                 dev_err(hdev->dev, "Cannot obtain PCI resources\n");
406                 return rc;
407         }
408
409         hdev->pcie_bar[SRAM_CFG_BAR_ID] =
410                         pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID);
411         if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) {
412                 dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n");
413                 rc = -ENODEV;
414                 goto err_release_regions;
415         }
416
417         hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID);
418         if (!hdev->pcie_bar[MSIX_BAR_ID]) {
419                 dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n");
420                 rc = -ENODEV;
421                 goto err_unmap_sram_cfg;
422         }
423
424         hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID);
425         if (!hdev->pcie_bar[DDR_BAR_ID]) {
426                 dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n");
427                 rc = -ENODEV;
428                 goto err_unmap_msix;
429         }
430
431         hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
432                                 (CFG_BASE - SRAM_BASE_ADDR);
433
434         return 0;
435
436 err_unmap_msix:
437         iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
438 err_unmap_sram_cfg:
439         iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
440 err_release_regions:
441         pci_release_regions(pdev);
442
443         return rc;
444 }
445
446 /*
447  * goya_pci_bars_unmap - Unmap PCI BARS of Goya device
448  *
449  * @hdev: pointer to hl_device structure
450  *
451  * Release all PCI BARS and unmap their virtual addresses
452  *
453  */
454 static void goya_pci_bars_unmap(struct hl_device *hdev)
455 {
456         struct pci_dev *pdev = hdev->pdev;
457
458         iounmap(hdev->pcie_bar[DDR_BAR_ID]);
459         iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
460         iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
461         pci_release_regions(pdev);
462 }
463
464 /*
465  * goya_elbi_write - Write through the ELBI interface
466  *
467  * @hdev: pointer to hl_device structure
468  *
469  * return 0 on success, -1 on failure
470  *
471  */
472 static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
473 {
474         struct pci_dev *pdev = hdev->pdev;
475         ktime_t timeout;
476         u32 val;
477
478         /* Clear previous status */
479         pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
480
481         pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
482         pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
483         pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
484                                 PCI_CONFIG_ELBI_CTRL_WRITE);
485
486         timeout = ktime_add_ms(ktime_get(), 10);
487         for (;;) {
488                 pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
489                 if (val & PCI_CONFIG_ELBI_STS_MASK)
490                         break;
491                 if (ktime_compare(ktime_get(), timeout) > 0) {
492                         pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
493                                                 &val);
494                         break;
495                 }
496                 usleep_range(300, 500);
497         }
498
499         if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
500                 return 0;
501
502         if (val & PCI_CONFIG_ELBI_STS_ERR) {
503                 dev_err(hdev->dev, "Error writing to ELBI\n");
504                 return -EIO;
505         }
506
507         if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
508                 dev_err(hdev->dev, "ELBI write didn't finish in time\n");
509                 return -EIO;
510         }
511
512         dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
513         return -EIO;
514 }
515
516 /*
517  * goya_iatu_write - iatu write routine
518  *
519  * @hdev: pointer to hl_device structure
520  *
521  */
522 static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
523 {
524         u32 dbi_offset;
525         int rc;
526
527         dbi_offset = addr & 0xFFF;
528
529         rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000);
530         rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data);
531
532         if (rc)
533                 return -EIO;
534
535         return 0;
536 }
537
538 static void goya_reset_link_through_bridge(struct hl_device *hdev)
539 {
540         struct pci_dev *pdev = hdev->pdev;
541         struct pci_dev *parent_port;
542         u16 val;
543
544         parent_port = pdev->bus->self;
545         pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
546         val |= PCI_BRIDGE_CTL_BUS_RESET;
547         pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
548         ssleep(1);
549
550         val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
551         pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
552         ssleep(3);
553 }
554
555 /*
556  * goya_set_ddr_bar_base - set DDR bar to map specific device address
557  *
558  * @hdev: pointer to hl_device structure
559  * @addr: address in DDR. Must be aligned to DDR bar size
560  *
561  * This function configures the iATU so that the DDR bar will start at the
562  * specified addr.
563  *
564  */
565 static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
566 {
567         struct goya_device *goya = hdev->asic_specific;
568         int rc;
569
570         if ((goya) && (goya->ddr_bar_cur_addr == addr))
571                 return 0;
572
573         /* Inbound Region 1 - Bar 4 - Point to DDR */
574         rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr));
575         rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr));
576         rc |= goya_iatu_write(hdev, 0x300, 0);
577         /* Enable + Bar match + match enable + Bar 4 */
578         rc |= goya_iatu_write(hdev, 0x304, 0xC0080400);
579
580         /* Return the DBI window to the default location */
581         rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
582         rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
583
584         if (rc) {
585                 dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr);
586                 return -EIO;
587         }
588
589         if (goya)
590                 goya->ddr_bar_cur_addr = addr;
591
592         return 0;
593 }
594
595 /*
596  * goya_init_iatu - Initialize the iATU unit inside the PCI controller
597  *
598  * @hdev: pointer to hl_device structure
599  *
600  * This is needed in case the firmware doesn't initialize the iATU
601  *
602  */
603 static int goya_init_iatu(struct hl_device *hdev)
604 {
605         int rc;
606
607         /* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */
608         rc  = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR));
609         rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR));
610         rc |= goya_iatu_write(hdev, 0x100, 0);
611         /* Enable + Bar match + match enable */
612         rc |= goya_iatu_write(hdev, 0x104, 0xC0080000);
613
614         /* Inbound Region 1 - Bar 4 - Point to DDR */
615         rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
616
617         /* Outbound Region 0 - Point to Host */
618         rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE));
619         rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE));
620         rc |= goya_iatu_write(hdev, 0x010,
621                 lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
622         rc |= goya_iatu_write(hdev, 0x014, 0);
623         rc |= goya_iatu_write(hdev, 0x018, 0);
624         rc |= goya_iatu_write(hdev, 0x020,
625                 upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
626         /* Increase region size */
627         rc |= goya_iatu_write(hdev, 0x000, 0x00002000);
628         /* Enable */
629         rc |= goya_iatu_write(hdev, 0x004, 0x80000000);
630
631         /* Return the DBI window to the default location */
632         rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
633         rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
634
635         if (rc)
636                 return -EIO;
637
638         return 0;
639 }
640
641 /*
642  * goya_early_init - GOYA early initialization code
643  *
644  * @hdev: pointer to hl_device structure
645  *
646  * Verify PCI bars
647  * Set DMA masks
648  * PCI controller initialization
649  * Map PCI bars
650  *
651  */
652 static int goya_early_init(struct hl_device *hdev)
653 {
654         struct asic_fixed_properties *prop = &hdev->asic_prop;
655         struct pci_dev *pdev = hdev->pdev;
656         u32 val;
657         int rc;
658
659         goya_get_fixed_properties(hdev);
660
661         /* Check BAR sizes */
662         if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
663                 dev_err(hdev->dev,
664                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
665                         SRAM_CFG_BAR_ID,
666                         (unsigned long long) pci_resource_len(pdev,
667                                                         SRAM_CFG_BAR_ID),
668                         CFG_BAR_SIZE);
669                 return -ENODEV;
670         }
671
672         if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
673                 dev_err(hdev->dev,
674                         "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
675                         MSIX_BAR_ID,
676                         (unsigned long long) pci_resource_len(pdev,
677                                                                 MSIX_BAR_ID),
678                         MSIX_BAR_SIZE);
679                 return -ENODEV;
680         }
681
682         prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
683
684         /* set DMA mask for GOYA */
685         rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
686         if (rc) {
687                 dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n");
688                 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
689                 if (rc) {
690                         dev_err(hdev->dev,
691                                 "Unable to set pci dma mask to 32 bits\n");
692                         return rc;
693                 }
694         }
695
696         rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
697         if (rc) {
698                 dev_warn(hdev->dev,
699                         "Unable to set pci consistent dma mask to 39 bits\n");
700                 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
701                 if (rc) {
702                         dev_err(hdev->dev,
703                                 "Unable to set pci consistent dma mask to 32 bits\n");
704                         return rc;
705                 }
706         }
707
708         if (hdev->reset_pcilink)
709                 goya_reset_link_through_bridge(hdev);
710
711         rc = pci_enable_device_mem(pdev);
712         if (rc) {
713                 dev_err(hdev->dev, "can't enable PCI device\n");
714                 return rc;
715         }
716
717         pci_set_master(pdev);
718
719         rc = goya_init_iatu(hdev);
720         if (rc) {
721                 dev_err(hdev->dev, "Failed to initialize iATU\n");
722                 goto disable_device;
723         }
724
725         rc = goya_pci_bars_map(hdev);
726         if (rc) {
727                 dev_err(hdev->dev, "Failed to initialize PCI BARS\n");
728                 goto disable_device;
729         }
730
731         if (!hdev->pldm) {
732                 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
733                 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
734                         dev_warn(hdev->dev,
735                                 "PCI strap is not configured correctly, PCI bus errors may occur\n");
736         }
737
738         return 0;
739
740 disable_device:
741         pci_clear_master(pdev);
742         pci_disable_device(pdev);
743
744         return rc;
745 }
746
747 /*
748  * goya_early_fini - GOYA early finalization code
749  *
750  * @hdev: pointer to hl_device structure
751  *
752  * Unmap PCI bars
753  *
754  */
755 static int goya_early_fini(struct hl_device *hdev)
756 {
757         goya_pci_bars_unmap(hdev);
758
759         pci_clear_master(hdev->pdev);
760         pci_disable_device(hdev->pdev);
761
762         return 0;
763 }
764
765 /*
766  * goya_fetch_psoc_frequency - Fetch PSOC frequency values
767  *
768  * @hdev: pointer to hl_device structure
769  *
770  */
771 static void goya_fetch_psoc_frequency(struct hl_device *hdev)
772 {
773         struct asic_fixed_properties *prop = &hdev->asic_prop;
774
775         prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
776         prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
777         prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
778         prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
779 }
780
781 /*
782  * goya_late_init - GOYA late initialization code
783  *
784  * @hdev: pointer to hl_device structure
785  *
786  * Get ArmCP info and send message to CPU to enable PCI access
787  */
788 static int goya_late_init(struct hl_device *hdev)
789 {
790         struct asic_fixed_properties *prop = &hdev->asic_prop;
791         struct goya_device *goya = hdev->asic_specific;
792         int rc;
793
794         rc = goya->armcp_info_get(hdev);
795         if (rc) {
796                 dev_err(hdev->dev, "Failed to get armcp info\n");
797                 return rc;
798         }
799
800         /* Now that we have the DRAM size in ASIC prop, we need to check
801          * its size and configure the DMA_IF DDR wrap protection (which is in
802          * the MMU block) accordingly. The value is the log2 of the DRAM size
803          */
804         WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
805
806         rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
807         if (rc) {
808                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
809                 return rc;
810         }
811
812         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
813                         GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
814
815         goya_fetch_psoc_frequency(hdev);
816
817         rc = goya_mmu_clear_pgt_range(hdev);
818         if (rc) {
819                 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
820                 goto disable_pci_access;
821         }
822
823         rc = goya_mmu_set_dram_default_page(hdev);
824         if (rc) {
825                 dev_err(hdev->dev, "Failed to set DRAM default page\n");
826                 goto disable_pci_access;
827         }
828
829         return 0;
830
831 disable_pci_access:
832         goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
833
834         return rc;
835 }
836
837 /*
838  * goya_late_fini - GOYA late tear-down code
839  *
840  * @hdev: pointer to hl_device structure
841  *
842  * Free sensors allocated structures
843  */
844 void goya_late_fini(struct hl_device *hdev)
845 {
846         const struct hwmon_channel_info **channel_info_arr;
847         int i = 0;
848
849         if (!hdev->hl_chip_info->info)
850                 return;
851
852         channel_info_arr = hdev->hl_chip_info->info;
853
854         while (channel_info_arr[i]) {
855                 kfree(channel_info_arr[i]->config);
856                 kfree(channel_info_arr[i]);
857                 i++;
858         }
859
860         kfree(channel_info_arr);
861
862         hdev->hl_chip_info->info = NULL;
863 }
864
865 /*
866  * goya_sw_init - Goya software initialization code
867  *
868  * @hdev: pointer to hl_device structure
869  *
870  */
871 static int goya_sw_init(struct hl_device *hdev)
872 {
873         struct goya_device *goya;
874         int rc;
875
876         /* Allocate device structure */
877         goya = kzalloc(sizeof(*goya), GFP_KERNEL);
878         if (!goya)
879                 return -ENOMEM;
880
881         goya->test_cpu_queue = goya_test_cpu_queue;
882         goya->armcp_info_get = goya_armcp_info_get;
883
884         /* according to goya_init_iatu */
885         goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
886
887         goya->mme_clk = GOYA_PLL_FREQ_LOW;
888         goya->tpc_clk = GOYA_PLL_FREQ_LOW;
889         goya->ic_clk = GOYA_PLL_FREQ_LOW;
890
891         hdev->asic_specific = goya;
892
893         /* Create DMA pool for small allocations */
894         hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
895                         &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
896         if (!hdev->dma_pool) {
897                 dev_err(hdev->dev, "failed to create DMA pool\n");
898                 rc = -ENOMEM;
899                 goto free_goya_device;
900         }
901
902         hdev->cpu_accessible_dma_mem =
903                         hdev->asic_funcs->dma_alloc_coherent(hdev,
904                                         CPU_ACCESSIBLE_MEM_SIZE,
905                                         &hdev->cpu_accessible_dma_address,
906                                         GFP_KERNEL | __GFP_ZERO);
907
908         if (!hdev->cpu_accessible_dma_mem) {
909                 dev_err(hdev->dev,
910                         "failed to allocate %d of dma memory for CPU accessible memory space\n",
911                         CPU_ACCESSIBLE_MEM_SIZE);
912                 rc = -ENOMEM;
913                 goto free_dma_pool;
914         }
915
916         hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1);
917         if (!hdev->cpu_accessible_dma_pool) {
918                 dev_err(hdev->dev,
919                         "Failed to create CPU accessible DMA pool\n");
920                 rc = -ENOMEM;
921                 goto free_cpu_pq_dma_mem;
922         }
923
924         rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
925                                 (uintptr_t) hdev->cpu_accessible_dma_mem,
926                                 CPU_ACCESSIBLE_MEM_SIZE, -1);
927         if (rc) {
928                 dev_err(hdev->dev,
929                         "Failed to add memory to CPU accessible DMA pool\n");
930                 rc = -EFAULT;
931                 goto free_cpu_pq_pool;
932         }
933
934         spin_lock_init(&goya->hw_queues_lock);
935
936         return 0;
937
938 free_cpu_pq_pool:
939         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
940 free_cpu_pq_dma_mem:
941         hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
942                         hdev->cpu_accessible_dma_mem,
943                         hdev->cpu_accessible_dma_address);
944 free_dma_pool:
945         dma_pool_destroy(hdev->dma_pool);
946 free_goya_device:
947         kfree(goya);
948
949         return rc;
950 }
951
952 /*
953  * goya_sw_fini - Goya software tear-down code
954  *
955  * @hdev: pointer to hl_device structure
956  *
957  */
958 static int goya_sw_fini(struct hl_device *hdev)
959 {
960         struct goya_device *goya = hdev->asic_specific;
961
962         gen_pool_destroy(hdev->cpu_accessible_dma_pool);
963
964         hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
965                         hdev->cpu_accessible_dma_mem,
966                         hdev->cpu_accessible_dma_address);
967
968         dma_pool_destroy(hdev->dma_pool);
969
970         kfree(goya);
971
972         return 0;
973 }
974
975 static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
976                 dma_addr_t bus_address)
977 {
978         struct goya_device *goya = hdev->asic_specific;
979         u32 mtr_base_lo, mtr_base_hi;
980         u32 so_base_lo, so_base_hi;
981         u32 gic_base_lo, gic_base_hi;
982         u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
983
984         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
985         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
986         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
987         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
988
989         gic_base_lo =
990                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
991         gic_base_hi =
992                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
993
994         WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
995         WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
996
997         WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
998         WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
999         WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
1000
1001         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1002         WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1003         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1004         WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1005         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1006         WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1007         WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
1008                         GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
1009
1010         /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
1011         WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1012         WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1013
1014         if (goya->hw_cap_initialized & HW_CAP_MMU)
1015                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
1016         else
1017                 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
1018
1019         WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN);
1020         WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1021 }
1022
1023 static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1024 {
1025         u32 gic_base_lo, gic_base_hi;
1026         u64 sob_addr;
1027         u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1028
1029         gic_base_lo =
1030                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1031         gic_base_hi =
1032                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1033
1034         WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1035         WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1036         WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1037                         GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1038
1039         if (dma_id) {
1040                 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1041                                 (dma_id - 1) * 4;
1042                 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off,
1043                                 lower_32_bits(sob_addr));
1044                 WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off,
1045                                 upper_32_bits(sob_addr));
1046                 WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1047         }
1048 }
1049
1050 /*
1051  * goya_init_dma_qmans - Initialize QMAN DMA registers
1052  *
1053  * @hdev: pointer to hl_device structure
1054  *
1055  * Initialize the H/W registers of the QMAN DMA channels
1056  *
1057  */
1058 static void goya_init_dma_qmans(struct hl_device *hdev)
1059 {
1060         struct goya_device *goya = hdev->asic_specific;
1061         struct hl_hw_queue *q;
1062         dma_addr_t bus_address;
1063         int i;
1064
1065         if (goya->hw_cap_initialized & HW_CAP_DMA)
1066                 return;
1067
1068         q = &hdev->kernel_queues[0];
1069
1070         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1071                 bus_address = q->bus_address +
1072                                 hdev->asic_prop.host_phys_base_address;
1073
1074                 goya_init_dma_qman(hdev, i, bus_address);
1075                 goya_init_dma_ch(hdev, i);
1076         }
1077
1078         goya->hw_cap_initialized |= HW_CAP_DMA;
1079 }
1080
1081 /*
1082  * goya_disable_external_queues - Disable external queues
1083  *
1084  * @hdev: pointer to hl_device structure
1085  *
1086  */
1087 static void goya_disable_external_queues(struct hl_device *hdev)
1088 {
1089         WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1090         WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1091         WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1092         WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1093         WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1094 }
1095
1096 static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1097                                 u32 cp_sts_reg, u32 glbl_sts0_reg)
1098 {
1099         int rc;
1100         u32 status;
1101
1102         /* use the values of TPC0 as they are all the same*/
1103
1104         WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1105
1106         status = RREG32(cp_sts_reg);
1107         if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1108                 rc = hl_poll_timeout(
1109                         hdev,
1110                         cp_sts_reg,
1111                         status,
1112                         !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1113                         1000,
1114                         QMAN_FENCE_TIMEOUT_USEC);
1115
1116                 /* if QMAN is stuck in fence no need to check for stop */
1117                 if (rc)
1118                         return 0;
1119         }
1120
1121         rc = hl_poll_timeout(
1122                 hdev,
1123                 glbl_sts0_reg,
1124                 status,
1125                 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1126                 1000,
1127                 QMAN_STOP_TIMEOUT_USEC);
1128
1129         if (rc) {
1130                 dev_err(hdev->dev,
1131                         "Timeout while waiting for QMAN to stop\n");
1132                 return -EINVAL;
1133         }
1134
1135         return 0;
1136 }
1137
1138 /*
1139  * goya_stop_external_queues - Stop external queues
1140  *
1141  * @hdev: pointer to hl_device structure
1142  *
1143  * Returns 0 on success
1144  *
1145  */
1146 static int goya_stop_external_queues(struct hl_device *hdev)
1147 {
1148         int rc, retval = 0;
1149
1150         rc = goya_stop_queue(hdev,
1151                         mmDMA_QM_0_GLBL_CFG1,
1152                         mmDMA_QM_0_CP_STS,
1153                         mmDMA_QM_0_GLBL_STS0);
1154
1155         if (rc) {
1156                 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1157                 retval = -EIO;
1158         }
1159
1160         rc = goya_stop_queue(hdev,
1161                         mmDMA_QM_1_GLBL_CFG1,
1162                         mmDMA_QM_1_CP_STS,
1163                         mmDMA_QM_1_GLBL_STS0);
1164
1165         if (rc) {
1166                 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1167                 retval = -EIO;
1168         }
1169
1170         rc = goya_stop_queue(hdev,
1171                         mmDMA_QM_2_GLBL_CFG1,
1172                         mmDMA_QM_2_CP_STS,
1173                         mmDMA_QM_2_GLBL_STS0);
1174
1175         if (rc) {
1176                 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1177                 retval = -EIO;
1178         }
1179
1180         rc = goya_stop_queue(hdev,
1181                         mmDMA_QM_3_GLBL_CFG1,
1182                         mmDMA_QM_3_CP_STS,
1183                         mmDMA_QM_3_GLBL_STS0);
1184
1185         if (rc) {
1186                 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1187                 retval = -EIO;
1188         }
1189
1190         rc = goya_stop_queue(hdev,
1191                         mmDMA_QM_4_GLBL_CFG1,
1192                         mmDMA_QM_4_CP_STS,
1193                         mmDMA_QM_4_GLBL_STS0);
1194
1195         if (rc) {
1196                 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1197                 retval = -EIO;
1198         }
1199
1200         return retval;
1201 }
1202
1203 static void goya_resume_external_queues(struct hl_device *hdev)
1204 {
1205         WREG32(mmDMA_QM_0_GLBL_CFG1, 0);
1206         WREG32(mmDMA_QM_1_GLBL_CFG1, 0);
1207         WREG32(mmDMA_QM_2_GLBL_CFG1, 0);
1208         WREG32(mmDMA_QM_3_GLBL_CFG1, 0);
1209         WREG32(mmDMA_QM_4_GLBL_CFG1, 0);
1210 }
1211
1212 /*
1213  * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1214  *
1215  * @hdev: pointer to hl_device structure
1216  *
1217  * Returns 0 on success
1218  *
1219  */
1220 static int goya_init_cpu_queues(struct hl_device *hdev)
1221 {
1222         struct goya_device *goya = hdev->asic_specific;
1223         struct hl_eq *eq;
1224         dma_addr_t bus_address;
1225         u32 status;
1226         struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1227         int err;
1228
1229         if (!hdev->cpu_queues_enable)
1230                 return 0;
1231
1232         if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1233                 return 0;
1234
1235         eq = &hdev->event_queue;
1236
1237         bus_address = cpu_pq->bus_address +
1238                         hdev->asic_prop.host_phys_base_address;
1239         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0, lower_32_bits(bus_address));
1240         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1, upper_32_bits(bus_address));
1241
1242         bus_address = eq->bus_address + hdev->asic_prop.host_phys_base_address;
1243         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(bus_address));
1244         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(bus_address));
1245
1246         bus_address = hdev->cpu_accessible_dma_address +
1247                         hdev->asic_prop.host_phys_base_address;
1248         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, lower_32_bits(bus_address));
1249         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, upper_32_bits(bus_address));
1250
1251         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
1252         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
1253         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, CPU_ACCESSIBLE_MEM_SIZE);
1254
1255         /* Used for EQ CI */
1256         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
1257
1258         WREG32(mmCPU_IF_PF_PQ_PI, 0);
1259
1260         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP);
1261
1262         WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1263                         GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1264
1265         err = hl_poll_timeout(
1266                 hdev,
1267                 mmPSOC_GLOBAL_CONF_SCRATCHPAD_7,
1268                 status,
1269                 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1270                 1000,
1271                 GOYA_CPU_TIMEOUT_USEC);
1272
1273         if (err) {
1274                 dev_err(hdev->dev,
1275                         "Failed to communicate with ARM CPU (ArmCP timeout)\n");
1276                 return -EIO;
1277         }
1278
1279         goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1280         return 0;
1281 }
1282
1283 static void goya_set_pll_refclk(struct hl_device *hdev)
1284 {
1285         WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1286         WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1287         WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1288         WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1289
1290         WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1291         WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1292         WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1293         WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1294
1295         WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1296         WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1297         WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1298         WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1299
1300         WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1301         WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1302         WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1303         WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1304
1305         WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1306         WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1307         WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1308         WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1309
1310         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1311         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1312         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1313         WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1314
1315         WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1316         WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1317         WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1318         WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1319 }
1320
1321 static void goya_disable_clk_rlx(struct hl_device *hdev)
1322 {
1323         WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1324         WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1325 }
1326
1327 static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1328 {
1329         u64 tpc_eml_address;
1330         u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1331         int err, slm_index;
1332
1333         tpc_offset = tpc_id * 0x40000;
1334         tpc_eml_offset = tpc_id * 0x200000;
1335         tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1336         tpc_slm_offset = tpc_eml_address + 0x100000;
1337
1338         /*
1339          * Workaround for Bug H2 #2443 :
1340          * "TPC SB is not initialized on chip reset"
1341          */
1342
1343         val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1344         if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1345                 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1346                         tpc_id);
1347
1348         WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1349
1350         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1351         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1352         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1353         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1354         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1355         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1356         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1357         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1358         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1359         WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1360
1361         WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1362                 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1363
1364         err = hl_poll_timeout(
1365                 hdev,
1366                 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1367                 val,
1368                 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1369                 1000,
1370                 HL_DEVICE_TIMEOUT_USEC);
1371
1372         if (err)
1373                 dev_err(hdev->dev,
1374                         "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1375
1376         WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1377                 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1378
1379         msleep(GOYA_RESET_WAIT_MSEC);
1380
1381         WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1382                 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1383
1384         msleep(GOYA_RESET_WAIT_MSEC);
1385
1386         for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1387                 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1388
1389         val = RREG32(tpc_slm_offset);
1390 }
1391
1392 static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1393 {
1394         struct goya_device *goya = hdev->asic_specific;
1395         int i;
1396
1397         if (hdev->pldm)
1398                 return;
1399
1400         if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1401                 return;
1402
1403         /* Workaround for H2 #2443 */
1404
1405         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1406                 _goya_tpc_mbist_workaround(hdev, i);
1407
1408         goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1409 }
1410
1411 /*
1412  * goya_init_golden_registers - Initialize golden registers
1413  *
1414  * @hdev: pointer to hl_device structure
1415  *
1416  * Initialize the H/W registers of the device
1417  *
1418  */
1419 static void goya_init_golden_registers(struct hl_device *hdev)
1420 {
1421         struct goya_device *goya = hdev->asic_specific;
1422         u32 polynom[10], tpc_intr_mask, offset;
1423         int i;
1424
1425         if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1426                 return;
1427
1428         polynom[0] = 0x00020080;
1429         polynom[1] = 0x00401000;
1430         polynom[2] = 0x00200800;
1431         polynom[3] = 0x00002000;
1432         polynom[4] = 0x00080200;
1433         polynom[5] = 0x00040100;
1434         polynom[6] = 0x00100400;
1435         polynom[7] = 0x00004000;
1436         polynom[8] = 0x00010000;
1437         polynom[9] = 0x00008000;
1438
1439         /* Mask all arithmetic interrupts from TPC */
1440         tpc_intr_mask = 0x7FFF;
1441
1442         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1443                 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1444                 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1445                 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1446                 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1447                 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1448
1449                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1450                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1451                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1452                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1453                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1454
1455
1456                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1457                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1458                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1459                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1460                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1461
1462                 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1463                 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1464                 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1465                 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1466                 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1467
1468                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1469                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1470                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1471                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1472                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1473
1474                 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1475                 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1476                 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1477                 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1478                 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1479         }
1480
1481         WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1482         WREG32(mmMME_AGU, 0x0f0f0f10);
1483         WREG32(mmMME_SEI_MASK, ~0x0);
1484
1485         WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1486         WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1487         WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1488         WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1489         WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1490         WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1491         WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1492         WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1493         WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1494         WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1495         WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1496         WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1497         WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1498         WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1499         WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1500         WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1501         WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1502         WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1503         WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1504         WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1505         WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1506         WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1507         WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1508         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1509         WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1510         WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1511         WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1512         WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1513         WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1514         WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1515         WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1516         WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1517         WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1518         WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1519         WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1520         WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1521         WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1522         WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1523         WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1524         WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1525         WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1526         WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1527         WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1528         WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1529         WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1530         WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1531         WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1532         WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1533         WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1534         WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1535         WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1536         WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1537         WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1538         WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1539         WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1540         WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1541         WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1542         WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1543         WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1544         WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1545         WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1546         WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1547         WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1548         WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1549         WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1550         WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1551         WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1552         WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1553         WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1554         WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1555         WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1556         WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1557         WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1558         WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1559         WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1560         WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1561         WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1562         WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1563         WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1564         WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1565         WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1566         WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1567         WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1568         WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1569
1570         WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1571         WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1572         WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1573         WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1574         WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1575         WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1576         WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1577         WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1578         WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1579         WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1580         WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1581         WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1582
1583         WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1584         WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1585         WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1586         WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1587         WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1588         WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1589         WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1590         WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1591         WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1592         WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1593         WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1594         WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1595
1596         WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1597         WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1598         WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1599         WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1600         WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1601         WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1602         WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1603         WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1604         WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1605         WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1606         WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1607         WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1608
1609         WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1610         WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1611         WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1612         WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1613         WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1614         WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1615         WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1616         WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1617         WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1618         WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1619         WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1620         WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1621
1622         WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1623         WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1624         WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1625         WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1626         WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1627         WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1628         WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1629         WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1630         WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1631         WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1632         WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1633         WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1634
1635         WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1636         WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1637         WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1638         WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1639         WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1640         WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1641         WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1642         WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1643         WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1644         WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1645         WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1646         WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1647
1648         for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1649                 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1650                 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1651                 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1652                 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1653                 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1654                 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1655
1656                 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1657                 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1658                 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1659                 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1660                 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1661                 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1662                 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1663                 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1664
1665                 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1666                 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1667         }
1668
1669         for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1670                 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1671                                 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1672                 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1673                                 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1674         }
1675
1676         for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1677                 /*
1678                  * Workaround for Bug H2 #2441 :
1679                  * "ST.NOP set trace event illegal opcode"
1680                  */
1681                 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1682
1683                 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1684                                 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1685                 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1686                                 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1687         }
1688
1689         WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1690         WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1691                         1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1692
1693         WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1694         WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1695                         1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1696
1697         /*
1698          * Workaround for H2 #HW-23 bug
1699          * Set DMA max outstanding read requests to 240 on DMA CH 1. Set it
1700          * to 16 on KMD DMA
1701          * We need to limit only these DMAs because the user can only read
1702          * from Host using DMA CH 1
1703          */
1704         WREG32(mmDMA_CH_0_CFG0, 0x0fff0010);
1705         WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1706
1707         goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1708 }
1709
1710 static void goya_init_mme_qman(struct hl_device *hdev)
1711 {
1712         u32 mtr_base_lo, mtr_base_hi;
1713         u32 so_base_lo, so_base_hi;
1714         u32 gic_base_lo, gic_base_hi;
1715         u64 qman_base_addr;
1716
1717         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1718         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1719         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1720         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1721
1722         gic_base_lo =
1723                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1724         gic_base_hi =
1725                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1726
1727         qman_base_addr = hdev->asic_prop.sram_base_address +
1728                                 MME_QMAN_BASE_OFFSET;
1729
1730         WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1731         WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1732         WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1733         WREG32(mmMME_QM_PQ_PI, 0);
1734         WREG32(mmMME_QM_PQ_CI, 0);
1735         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1736         WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1737         WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1738         WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1739
1740         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1741         WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1742         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1743         WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1744
1745         /* QMAN CQ has 8 cache lines */
1746         WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1747
1748         WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1749         WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1750
1751         WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1752
1753         WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1754
1755         WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1756
1757         WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1758 }
1759
1760 static void goya_init_mme_cmdq(struct hl_device *hdev)
1761 {
1762         u32 mtr_base_lo, mtr_base_hi;
1763         u32 so_base_lo, so_base_hi;
1764         u32 gic_base_lo, gic_base_hi;
1765         u64 qman_base_addr;
1766
1767         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1768         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1769         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1770         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1771
1772         gic_base_lo =
1773                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1774         gic_base_hi =
1775                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1776
1777         qman_base_addr = hdev->asic_prop.sram_base_address +
1778                                 MME_QMAN_BASE_OFFSET;
1779
1780         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1781         WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1782         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1783         WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1784
1785         /* CMDQ CQ has 20 cache lines */
1786         WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1787
1788         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1789         WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1790
1791         WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1792
1793         WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1794
1795         WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1796
1797         WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1798 }
1799
1800 static void goya_init_mme_qmans(struct hl_device *hdev)
1801 {
1802         struct goya_device *goya = hdev->asic_specific;
1803         u32 so_base_lo, so_base_hi;
1804
1805         if (goya->hw_cap_initialized & HW_CAP_MME)
1806                 return;
1807
1808         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1809         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1810
1811         WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1812         WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1813
1814         goya_init_mme_qman(hdev);
1815         goya_init_mme_cmdq(hdev);
1816
1817         goya->hw_cap_initialized |= HW_CAP_MME;
1818 }
1819
1820 static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1821 {
1822         u32 mtr_base_lo, mtr_base_hi;
1823         u32 so_base_lo, so_base_hi;
1824         u32 gic_base_lo, gic_base_hi;
1825         u64 qman_base_addr;
1826         u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1827
1828         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1829         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1830         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1831         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1832
1833         gic_base_lo =
1834                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1835         gic_base_hi =
1836                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1837
1838         qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1839
1840         WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1841         WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1842         WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1843         WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1844         WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1845         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1846         WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1847         WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1848         WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1849
1850         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1851         WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1852         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1853         WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1854
1855         WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1856
1857         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1858         WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1859
1860         WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1861                         GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1862
1863         WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1864
1865         WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1866
1867         WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1868 }
1869
1870 static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1871 {
1872         u32 mtr_base_lo, mtr_base_hi;
1873         u32 so_base_lo, so_base_hi;
1874         u32 gic_base_lo, gic_base_hi;
1875         u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1876
1877         mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1878         mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1879         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1880         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1881
1882         gic_base_lo =
1883                 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1884         gic_base_hi =
1885                 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1886
1887         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1888         WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1889         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1890         WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1891
1892         WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1893
1894         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1895         WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1896
1897         WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1898                         GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1899
1900         WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1901
1902         WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1903
1904         WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1905 }
1906
1907 static void goya_init_tpc_qmans(struct hl_device *hdev)
1908 {
1909         struct goya_device *goya = hdev->asic_specific;
1910         u32 so_base_lo, so_base_hi;
1911         u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1912                         mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1913         int i;
1914
1915         if (goya->hw_cap_initialized & HW_CAP_TPC)
1916                 return;
1917
1918         so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1919         so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1920
1921         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1922                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1923                                 so_base_lo);
1924                 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1925                                 so_base_hi);
1926         }
1927
1928         goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1929         goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1930         goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1931         goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1932         goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1933         goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1934         goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1935         goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1936
1937         for (i = 0 ; i < TPC_MAX_NUM ; i++)
1938                 goya_init_tpc_cmdq(hdev, i);
1939
1940         goya->hw_cap_initialized |= HW_CAP_TPC;
1941 }
1942
1943 /*
1944  * goya_disable_internal_queues - Disable internal queues
1945  *
1946  * @hdev: pointer to hl_device structure
1947  *
1948  */
1949 static void goya_disable_internal_queues(struct hl_device *hdev)
1950 {
1951         WREG32(mmMME_QM_GLBL_CFG0, 0);
1952         WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1953
1954         WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1955         WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1956
1957         WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1958         WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1959
1960         WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1961         WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1962
1963         WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1964         WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1965
1966         WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1967         WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1968
1969         WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1970         WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1971
1972         WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1973         WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1974
1975         WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1976         WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1977 }
1978
1979 /*
1980  * goya_stop_internal_queues - Stop internal queues
1981  *
1982  * @hdev: pointer to hl_device structure
1983  *
1984  * Returns 0 on success
1985  *
1986  */
1987 static int goya_stop_internal_queues(struct hl_device *hdev)
1988 {
1989         int rc, retval = 0;
1990
1991         /*
1992          * Each queue (QMAN) is a separate H/W logic. That means that each
1993          * QMAN can be stopped independently and failure to stop one does NOT
1994          * mandate we should not try to stop other QMANs
1995          */
1996
1997         rc = goya_stop_queue(hdev,
1998                         mmMME_QM_GLBL_CFG1,
1999                         mmMME_QM_CP_STS,
2000                         mmMME_QM_GLBL_STS0);
2001
2002         if (rc) {
2003                 dev_err(hdev->dev, "failed to stop MME QMAN\n");
2004                 retval = -EIO;
2005         }
2006
2007         rc = goya_stop_queue(hdev,
2008                         mmMME_CMDQ_GLBL_CFG1,
2009                         mmMME_CMDQ_CP_STS,
2010                         mmMME_CMDQ_GLBL_STS0);
2011
2012         if (rc) {
2013                 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2014                 retval = -EIO;
2015         }
2016
2017         rc = goya_stop_queue(hdev,
2018                         mmTPC0_QM_GLBL_CFG1,
2019                         mmTPC0_QM_CP_STS,
2020                         mmTPC0_QM_GLBL_STS0);
2021
2022         if (rc) {
2023                 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2024                 retval = -EIO;
2025         }
2026
2027         rc = goya_stop_queue(hdev,
2028                         mmTPC0_CMDQ_GLBL_CFG1,
2029                         mmTPC0_CMDQ_CP_STS,
2030                         mmTPC0_CMDQ_GLBL_STS0);
2031
2032         if (rc) {
2033                 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2034                 retval = -EIO;
2035         }
2036
2037         rc = goya_stop_queue(hdev,
2038                         mmTPC1_QM_GLBL_CFG1,
2039                         mmTPC1_QM_CP_STS,
2040                         mmTPC1_QM_GLBL_STS0);
2041
2042         if (rc) {
2043                 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2044                 retval = -EIO;
2045         }
2046
2047         rc = goya_stop_queue(hdev,
2048                         mmTPC1_CMDQ_GLBL_CFG1,
2049                         mmTPC1_CMDQ_CP_STS,
2050                         mmTPC1_CMDQ_GLBL_STS0);
2051
2052         if (rc) {
2053                 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2054                 retval = -EIO;
2055         }
2056
2057         rc = goya_stop_queue(hdev,
2058                         mmTPC2_QM_GLBL_CFG1,
2059                         mmTPC2_QM_CP_STS,
2060                         mmTPC2_QM_GLBL_STS0);
2061
2062         if (rc) {
2063                 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2064                 retval = -EIO;
2065         }
2066
2067         rc = goya_stop_queue(hdev,
2068                         mmTPC2_CMDQ_GLBL_CFG1,
2069                         mmTPC2_CMDQ_CP_STS,
2070                         mmTPC2_CMDQ_GLBL_STS0);
2071
2072         if (rc) {
2073                 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2074                 retval = -EIO;
2075         }
2076
2077         rc = goya_stop_queue(hdev,
2078                         mmTPC3_QM_GLBL_CFG1,
2079                         mmTPC3_QM_CP_STS,
2080                         mmTPC3_QM_GLBL_STS0);
2081
2082         if (rc) {
2083                 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2084                 retval = -EIO;
2085         }
2086
2087         rc = goya_stop_queue(hdev,
2088                         mmTPC3_CMDQ_GLBL_CFG1,
2089                         mmTPC3_CMDQ_CP_STS,
2090                         mmTPC3_CMDQ_GLBL_STS0);
2091
2092         if (rc) {
2093                 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2094                 retval = -EIO;
2095         }
2096
2097         rc = goya_stop_queue(hdev,
2098                         mmTPC4_QM_GLBL_CFG1,
2099                         mmTPC4_QM_CP_STS,
2100                         mmTPC4_QM_GLBL_STS0);
2101
2102         if (rc) {
2103                 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2104                 retval = -EIO;
2105         }
2106
2107         rc = goya_stop_queue(hdev,
2108                         mmTPC4_CMDQ_GLBL_CFG1,
2109                         mmTPC4_CMDQ_CP_STS,
2110                         mmTPC4_CMDQ_GLBL_STS0);
2111
2112         if (rc) {
2113                 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2114                 retval = -EIO;
2115         }
2116
2117         rc = goya_stop_queue(hdev,
2118                         mmTPC5_QM_GLBL_CFG1,
2119                         mmTPC5_QM_CP_STS,
2120                         mmTPC5_QM_GLBL_STS0);
2121
2122         if (rc) {
2123                 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2124                 retval = -EIO;
2125         }
2126
2127         rc = goya_stop_queue(hdev,
2128                         mmTPC5_CMDQ_GLBL_CFG1,
2129                         mmTPC5_CMDQ_CP_STS,
2130                         mmTPC5_CMDQ_GLBL_STS0);
2131
2132         if (rc) {
2133                 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2134                 retval = -EIO;
2135         }
2136
2137         rc = goya_stop_queue(hdev,
2138                         mmTPC6_QM_GLBL_CFG1,
2139                         mmTPC6_QM_CP_STS,
2140                         mmTPC6_QM_GLBL_STS0);
2141
2142         if (rc) {
2143                 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2144                 retval = -EIO;
2145         }
2146
2147         rc = goya_stop_queue(hdev,
2148                         mmTPC6_CMDQ_GLBL_CFG1,
2149                         mmTPC6_CMDQ_CP_STS,
2150                         mmTPC6_CMDQ_GLBL_STS0);
2151
2152         if (rc) {
2153                 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2154                 retval = -EIO;
2155         }
2156
2157         rc = goya_stop_queue(hdev,
2158                         mmTPC7_QM_GLBL_CFG1,
2159                         mmTPC7_QM_CP_STS,
2160                         mmTPC7_QM_GLBL_STS0);
2161
2162         if (rc) {
2163                 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2164                 retval = -EIO;
2165         }
2166
2167         rc = goya_stop_queue(hdev,
2168                         mmTPC7_CMDQ_GLBL_CFG1,
2169                         mmTPC7_CMDQ_CP_STS,
2170                         mmTPC7_CMDQ_GLBL_STS0);
2171
2172         if (rc) {
2173                 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2174                 retval = -EIO;
2175         }
2176
2177         return retval;
2178 }
2179
2180 static void goya_resume_internal_queues(struct hl_device *hdev)
2181 {
2182         WREG32(mmMME_QM_GLBL_CFG1, 0);
2183         WREG32(mmMME_CMDQ_GLBL_CFG1, 0);
2184
2185         WREG32(mmTPC0_QM_GLBL_CFG1, 0);
2186         WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0);
2187
2188         WREG32(mmTPC1_QM_GLBL_CFG1, 0);
2189         WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0);
2190
2191         WREG32(mmTPC2_QM_GLBL_CFG1, 0);
2192         WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0);
2193
2194         WREG32(mmTPC3_QM_GLBL_CFG1, 0);
2195         WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0);
2196
2197         WREG32(mmTPC4_QM_GLBL_CFG1, 0);
2198         WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0);
2199
2200         WREG32(mmTPC5_QM_GLBL_CFG1, 0);
2201         WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0);
2202
2203         WREG32(mmTPC6_QM_GLBL_CFG1, 0);
2204         WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0);
2205
2206         WREG32(mmTPC7_QM_GLBL_CFG1, 0);
2207         WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0);
2208 }
2209
2210 static void goya_dma_stall(struct hl_device *hdev)
2211 {
2212         WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2213         WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2214         WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2215         WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2216         WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2217 }
2218
2219 static void goya_tpc_stall(struct hl_device *hdev)
2220 {
2221         WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2222         WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2223         WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2224         WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2225         WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2226         WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2227         WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2228         WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2229 }
2230
2231 static void goya_mme_stall(struct hl_device *hdev)
2232 {
2233         WREG32(mmMME_STALL, 0xFFFFFFFF);
2234 }
2235
2236 static int goya_enable_msix(struct hl_device *hdev)
2237 {
2238         struct goya_device *goya = hdev->asic_specific;
2239         int cq_cnt = hdev->asic_prop.completion_queues_count;
2240         int rc, i, irq_cnt_init, irq;
2241
2242         if (goya->hw_cap_initialized & HW_CAP_MSIX)
2243                 return 0;
2244
2245         rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2246                                 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2247         if (rc < 0) {
2248                 dev_err(hdev->dev,
2249                         "MSI-X: Failed to enable support -- %d/%d\n",
2250                         GOYA_MSIX_ENTRIES, rc);
2251                 return rc;
2252         }
2253
2254         for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2255                 irq = pci_irq_vector(hdev->pdev, i);
2256                 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2257                                 &hdev->completion_queue[i]);
2258                 if (rc) {
2259                         dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2260                         goto free_irqs;
2261                 }
2262         }
2263
2264         irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX);
2265
2266         rc = request_irq(irq, hl_irq_handler_eq, 0,
2267                         goya_irq_name[EVENT_QUEUE_MSIX_IDX],
2268                         &hdev->event_queue);
2269         if (rc) {
2270                 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2271                 goto free_irqs;
2272         }
2273
2274         goya->hw_cap_initialized |= HW_CAP_MSIX;
2275         return 0;
2276
2277 free_irqs:
2278         for (i = 0 ; i < irq_cnt_init ; i++)
2279                 free_irq(pci_irq_vector(hdev->pdev, i),
2280                         &hdev->completion_queue[i]);
2281
2282         pci_free_irq_vectors(hdev->pdev);
2283         return rc;
2284 }
2285
2286 static void goya_sync_irqs(struct hl_device *hdev)
2287 {
2288         struct goya_device *goya = hdev->asic_specific;
2289         int i;
2290
2291         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2292                 return;
2293
2294         /* Wait for all pending IRQs to be finished */
2295         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2296                 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2297
2298         synchronize_irq(pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX));
2299 }
2300
2301 static void goya_disable_msix(struct hl_device *hdev)
2302 {
2303         struct goya_device *goya = hdev->asic_specific;
2304         int i, irq;
2305
2306         if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2307                 return;
2308
2309         goya_sync_irqs(hdev);
2310
2311         irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX);
2312         free_irq(irq, &hdev->event_queue);
2313
2314         for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2315                 irq = pci_irq_vector(hdev->pdev, i);
2316                 free_irq(irq, &hdev->completion_queue[i]);
2317         }
2318
2319         pci_free_irq_vectors(hdev->pdev);
2320
2321         goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2322 }
2323
2324 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2325 {
2326         u32 wait_timeout_ms, cpu_timeout_ms;
2327
2328         dev_info(hdev->dev,
2329                 "Halting compute engines and disabling interrupts\n");
2330
2331         if (hdev->pldm) {
2332                 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2333                 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2334         } else {
2335                 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2336                 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2337         }
2338
2339         if (hard_reset) {
2340                 /*
2341                  * I don't know what is the state of the CPU so make sure it is
2342                  * stopped in any means necessary
2343                  */
2344                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2345                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2346                         GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2347                 msleep(cpu_timeout_ms);
2348         }
2349
2350         goya_stop_external_queues(hdev);
2351         goya_stop_internal_queues(hdev);
2352
2353         msleep(wait_timeout_ms);
2354
2355         goya_dma_stall(hdev);
2356         goya_tpc_stall(hdev);
2357         goya_mme_stall(hdev);
2358
2359         msleep(wait_timeout_ms);
2360
2361         goya_disable_external_queues(hdev);
2362         goya_disable_internal_queues(hdev);
2363
2364         if (hard_reset)
2365                 goya_disable_msix(hdev);
2366         else
2367                 goya_sync_irqs(hdev);
2368 }
2369
2370 /*
2371  * goya_push_fw_to_device - Push FW code to device
2372  *
2373  * @hdev: pointer to hl_device structure
2374  *
2375  * Copy fw code from firmware file to device memory.
2376  * Returns 0 on success
2377  *
2378  */
2379 static int goya_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
2380                                         void __iomem *dst)
2381 {
2382         const struct firmware *fw;
2383         const u64 *fw_data;
2384         size_t fw_size, i;
2385         int rc;
2386
2387         rc = request_firmware(&fw, fw_name, hdev->dev);
2388
2389         if (rc) {
2390                 dev_err(hdev->dev, "Failed to request %s\n", fw_name);
2391                 goto out;
2392         }
2393
2394         fw_size = fw->size;
2395         if ((fw_size % 4) != 0) {
2396                 dev_err(hdev->dev, "illegal %s firmware size %zu\n",
2397                         fw_name, fw_size);
2398                 rc = -EINVAL;
2399                 goto out;
2400         }
2401
2402         dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
2403
2404         fw_data = (const u64 *) fw->data;
2405
2406         if ((fw->size % 8) != 0)
2407                 fw_size -= 8;
2408
2409         for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
2410                 if (!(i & (0x80000 - 1))) {
2411                         dev_dbg(hdev->dev,
2412                                 "copied so far %zu out of %zu for %s firmware",
2413                                 i, fw_size, fw_name);
2414                         usleep_range(20, 100);
2415                 }
2416
2417                 writeq(*fw_data, dst);
2418         }
2419
2420         if ((fw->size % 8) != 0)
2421                 writel(*(const u32 *) fw_data, dst);
2422
2423 out:
2424         release_firmware(fw);
2425         return rc;
2426 }
2427
2428 static int goya_pldm_init_cpu(struct hl_device *hdev)
2429 {
2430         char fw_name[200];
2431         void __iomem *dst;
2432         u32 val, unit_rst_val;
2433         int rc;
2434
2435         /* Must initialize SRAM scrambler before pushing u-boot to SRAM */
2436         goya_init_golden_registers(hdev);
2437
2438         /* Put ARM cores into reset */
2439         WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
2440         val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2441
2442         /* Reset the CA53 MACRO */
2443         unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2444         WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
2445         val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2446         WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
2447         val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2448
2449         snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
2450         dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
2451         rc = goya_push_fw_to_device(hdev, fw_name, dst);
2452         if (rc)
2453                 return rc;
2454
2455         snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
2456         dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2457         rc = goya_push_fw_to_device(hdev, fw_name, dst);
2458         if (rc)
2459                 return rc;
2460
2461         WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2462         WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT, CPU_BOOT_STATUS_NA);
2463
2464         WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0,
2465                 lower_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2466         WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0,
2467                 upper_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2468
2469         /* Release ARM core 0 from reset */
2470         WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
2471                                         CPU_RESET_CORE0_DEASSERT);
2472         val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2473
2474         return 0;
2475 }
2476
2477 /*
2478  * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2479  * The version string should be located by that offset.
2480  */
2481 static void goya_read_device_fw_version(struct hl_device *hdev,
2482                                         enum goya_fw_component fwc)
2483 {
2484         const char *name;
2485         u32 ver_off;
2486         char *dest;
2487
2488         switch (fwc) {
2489         case FW_COMP_UBOOT:
2490                 ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
2491                 dest = hdev->asic_prop.uboot_ver;
2492                 name = "U-Boot";
2493                 break;
2494         case FW_COMP_PREBOOT:
2495                 ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
2496                 dest = hdev->asic_prop.preboot_ver;
2497                 name = "Preboot";
2498                 break;
2499         default:
2500                 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2501                 return;
2502         }
2503
2504         ver_off &= ~((u32)SRAM_BASE_ADDR);
2505
2506         if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2507                 memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2508                                                         VERSION_MAX_LEN);
2509         } else {
2510                 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2511                                                                 name, ver_off);
2512                 strcpy(dest, "unavailable");
2513         }
2514 }
2515
2516 static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
2517 {
2518         struct goya_device *goya = hdev->asic_specific;
2519         char fw_name[200];
2520         void __iomem *dst;
2521         u32 status;
2522         int rc;
2523
2524         if (!hdev->cpu_enable)
2525                 return 0;
2526
2527         if (goya->hw_cap_initialized & HW_CAP_CPU)
2528                 return 0;
2529
2530         /*
2531          * Before pushing u-boot/linux to device, need to set the ddr bar to
2532          * base address of dram
2533          */
2534         rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2535         if (rc) {
2536                 dev_err(hdev->dev,
2537                         "failed to map DDR bar to DRAM base address\n");
2538                 return rc;
2539         }
2540
2541         if (hdev->pldm) {
2542                 rc = goya_pldm_init_cpu(hdev);
2543                 if (rc)
2544                         return rc;
2545
2546                 goto out;
2547         }
2548
2549         /* Make sure CPU boot-loader is running */
2550         rc = hl_poll_timeout(
2551                 hdev,
2552                 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2553                 status,
2554                 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
2555                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2556                 10000,
2557                 cpu_timeout);
2558
2559         if (rc) {
2560                 dev_err(hdev->dev, "Error in ARM u-boot!");
2561                 switch (status) {
2562                 case CPU_BOOT_STATUS_NA:
2563                         dev_err(hdev->dev,
2564                                 "ARM status %d - BTL did NOT run\n", status);
2565                         break;
2566                 case CPU_BOOT_STATUS_IN_WFE:
2567                         dev_err(hdev->dev,
2568                                 "ARM status %d - Inside WFE loop\n", status);
2569                         break;
2570                 case CPU_BOOT_STATUS_IN_BTL:
2571                         dev_err(hdev->dev,
2572                                 "ARM status %d - Stuck in BTL\n", status);
2573                         break;
2574                 case CPU_BOOT_STATUS_IN_PREBOOT:
2575                         dev_err(hdev->dev,
2576                                 "ARM status %d - Stuck in Preboot\n", status);
2577                         break;
2578                 case CPU_BOOT_STATUS_IN_SPL:
2579                         dev_err(hdev->dev,
2580                                 "ARM status %d - Stuck in SPL\n", status);
2581                         break;
2582                 case CPU_BOOT_STATUS_IN_UBOOT:
2583                         dev_err(hdev->dev,
2584                                 "ARM status %d - Stuck in u-boot\n", status);
2585                         break;
2586                 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
2587                         dev_err(hdev->dev,
2588                                 "ARM status %d - DDR initialization failed\n",
2589                                 status);
2590                         break;
2591                 default:
2592                         dev_err(hdev->dev,
2593                                 "ARM status %d - Invalid status code\n",
2594                                 status);
2595                         break;
2596                 }
2597                 return -EIO;
2598         }
2599
2600         /* Read U-Boot version now in case we will later fail */
2601         goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
2602         goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
2603
2604         if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
2605                 goto out;
2606
2607         if (!hdev->fw_loading) {
2608                 dev_info(hdev->dev, "Skip loading FW\n");
2609                 goto out;
2610         }
2611
2612         snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
2613         dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2614         rc = goya_push_fw_to_device(hdev, fw_name, dst);
2615         if (rc)
2616                 return rc;
2617
2618         WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2619
2620         rc = hl_poll_timeout(
2621                 hdev,
2622                 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2623                 status,
2624                 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2625                 10000,
2626                 cpu_timeout);
2627
2628         if (rc) {
2629                 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
2630                         dev_err(hdev->dev,
2631                                 "ARM u-boot reports FIT image is corrupted\n");
2632                 else
2633                         dev_err(hdev->dev,
2634                                 "ARM Linux failed to load, %d\n", status);
2635                 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_NA);
2636                 return -EIO;
2637         }
2638
2639         dev_info(hdev->dev, "Successfully loaded firmware to device\n");
2640
2641 out:
2642         goya->hw_cap_initialized |= HW_CAP_CPU;
2643
2644         return 0;
2645 }
2646
2647 static int goya_mmu_init(struct hl_device *hdev)
2648 {
2649         struct asic_fixed_properties *prop = &hdev->asic_prop;
2650         struct goya_device *goya = hdev->asic_specific;
2651         u64 hop0_addr;
2652         int rc, i;
2653
2654         if (!hdev->mmu_enable)
2655                 return 0;
2656
2657         if (goya->hw_cap_initialized & HW_CAP_MMU)
2658                 return 0;
2659
2660         hdev->dram_supports_virtual_memory = true;
2661         hdev->dram_default_page_mapping = true;
2662
2663         for (i = 0 ; i < prop->max_asid ; i++) {
2664                 hop0_addr = prop->mmu_pgt_addr +
2665                                 (i * prop->mmu_hop_table_size);
2666
2667                 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2668                 if (rc) {
2669                         dev_err(hdev->dev,
2670                                 "failed to set hop0 addr for asid %d\n", i);
2671                         goto err;
2672                 }
2673         }
2674
2675         goya->hw_cap_initialized |= HW_CAP_MMU;
2676
2677         /* init MMU cache manage page */
2678         WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2679         WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR << 40);
2680
2681         /* Remove follower feature due to performance bug */
2682         WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2683                         (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2684
2685         hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
2686
2687         WREG32(mmMMU_MMU_ENABLE, 1);
2688         WREG32(mmMMU_SPI_MASK, 0xF);
2689
2690         return 0;
2691
2692 err:
2693         return rc;
2694 }
2695
2696 /*
2697  * goya_hw_init - Goya hardware initialization code
2698  *
2699  * @hdev: pointer to hl_device structure
2700  *
2701  * Returns 0 on success
2702  *
2703  */
2704 static int goya_hw_init(struct hl_device *hdev)
2705 {
2706         struct asic_fixed_properties *prop = &hdev->asic_prop;
2707         u32 val;
2708         int rc;
2709
2710         dev_info(hdev->dev, "Starting initialization of H/W\n");
2711
2712         /* Perform read from the device to make sure device is up */
2713         val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2714
2715         /*
2716          * Let's mark in the H/W that we have reached this point. We check
2717          * this value in the reset_before_init function to understand whether
2718          * we need to reset the chip before doing H/W init. This register is
2719          * cleared by the H/W upon H/W reset
2720          */
2721         WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY);
2722
2723         rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
2724         if (rc) {
2725                 dev_err(hdev->dev, "failed to initialize CPU\n");
2726                 return rc;
2727         }
2728
2729         goya_tpc_mbist_workaround(hdev);
2730
2731         goya_init_golden_registers(hdev);
2732
2733         /*
2734          * After CPU initialization is finished, change DDR bar mapping inside
2735          * iATU to point to the start address of the MMU page tables
2736          */
2737         rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
2738                 (MMU_PAGE_TABLES_ADDR & ~(prop->dram_pci_bar_size - 0x1ull)));
2739         if (rc) {
2740                 dev_err(hdev->dev,
2741                         "failed to map DDR bar to MMU page tables\n");
2742                 return rc;
2743         }
2744
2745         rc = goya_mmu_init(hdev);
2746         if (rc)
2747                 return rc;
2748
2749         goya_init_security(hdev);
2750
2751         goya_init_dma_qmans(hdev);
2752
2753         goya_init_mme_qmans(hdev);
2754
2755         goya_init_tpc_qmans(hdev);
2756
2757         /* MSI-X must be enabled before CPU queues are initialized */
2758         rc = goya_enable_msix(hdev);
2759         if (rc)
2760                 goto disable_queues;
2761
2762         rc = goya_init_cpu_queues(hdev);
2763         if (rc) {
2764                 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2765                         rc);
2766                 goto disable_msix;
2767         }
2768
2769         /* CPU initialization is finished, we can now move to 48 bit DMA mask */
2770         rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(48));
2771         if (rc) {
2772                 dev_warn(hdev->dev, "Unable to set pci dma mask to 48 bits\n");
2773                 rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(32));
2774                 if (rc) {
2775                         dev_err(hdev->dev,
2776                                 "Unable to set pci dma mask to 32 bits\n");
2777                         goto disable_pci_access;
2778                 }
2779         }
2780
2781         rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(48));
2782         if (rc) {
2783                 dev_warn(hdev->dev,
2784                         "Unable to set pci consistent dma mask to 48 bits\n");
2785                 rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(32));
2786                 if (rc) {
2787                         dev_err(hdev->dev,
2788                                 "Unable to set pci consistent dma mask to 32 bits\n");
2789                         goto disable_pci_access;
2790                 }
2791         }
2792
2793         /* Perform read from the device to flush all MSI-X configuration */
2794         val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2795
2796         return 0;
2797
2798 disable_pci_access:
2799         goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2800 disable_msix:
2801         goya_disable_msix(hdev);
2802 disable_queues:
2803         goya_disable_internal_queues(hdev);
2804         goya_disable_external_queues(hdev);
2805
2806         return rc;
2807 }
2808
2809 /*
2810  * goya_hw_fini - Goya hardware tear-down code
2811  *
2812  * @hdev: pointer to hl_device structure
2813  * @hard_reset: should we do hard reset to all engines or just reset the
2814  *              compute/dma engines
2815  */
2816 static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2817 {
2818         struct goya_device *goya = hdev->asic_specific;
2819         u32 reset_timeout_ms, status;
2820
2821         if (hdev->pldm)
2822                 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2823         else
2824                 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2825
2826         if (hard_reset) {
2827                 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2828                 goya_disable_clk_rlx(hdev);
2829                 goya_set_pll_refclk(hdev);
2830
2831                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2832                 dev_info(hdev->dev,
2833                         "Issued HARD reset command, going to wait %dms\n",
2834                         reset_timeout_ms);
2835         } else {
2836                 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2837                 dev_info(hdev->dev,
2838                         "Issued SOFT reset command, going to wait %dms\n",
2839                         reset_timeout_ms);
2840         }
2841
2842         /*
2843          * After hard reset, we can't poll the BTM_FSM register because the PSOC
2844          * itself is in reset. In either reset we need to wait until the reset
2845          * is deasserted
2846          */
2847         msleep(reset_timeout_ms);
2848
2849         status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2850         if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2851                 dev_err(hdev->dev,
2852                         "Timeout while waiting for device to reset 0x%x\n",
2853                         status);
2854
2855         if (!hard_reset) {
2856                 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2857                                                 HW_CAP_GOLDEN | HW_CAP_TPC);
2858                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2859                                 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2860                 return;
2861         }
2862
2863         /* Chicken bit to re-initiate boot sequencer flow */
2864         WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2865                 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2866         /* Move boot manager FSM to pre boot sequencer init state */
2867         WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2868                         0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2869
2870         goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2871                                         HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2872                                         HW_CAP_DMA | HW_CAP_MME |
2873                                         HW_CAP_MMU | HW_CAP_TPC_MBIST |
2874                                         HW_CAP_GOLDEN | HW_CAP_TPC);
2875         memset(goya->events_stat, 0, sizeof(goya->events_stat));
2876
2877         if (!hdev->pldm) {
2878                 int rc;
2879                 /* In case we are running inside VM and the VM is
2880                  * shutting down, we need to make sure CPU boot-loader
2881                  * is running before we can continue the VM shutdown.
2882                  * That is because the VM will send an FLR signal that
2883                  * we must answer
2884                  */
2885                 dev_info(hdev->dev,
2886                         "Going to wait up to %ds for CPU boot loader\n",
2887                         GOYA_CPU_TIMEOUT_USEC / 1000 / 1000);
2888
2889                 rc = hl_poll_timeout(
2890                         hdev,
2891                         mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2892                         status,
2893                         (status == CPU_BOOT_STATUS_DRAM_RDY),
2894                         10000,
2895                         GOYA_CPU_TIMEOUT_USEC);
2896                 if (rc)
2897                         dev_err(hdev->dev,
2898                                 "failed to wait for CPU boot loader\n");
2899         }
2900 }
2901
2902 int goya_suspend(struct hl_device *hdev)
2903 {
2904         int rc;
2905
2906         rc = goya_stop_internal_queues(hdev);
2907
2908         if (rc) {
2909                 dev_err(hdev->dev, "failed to stop internal queues\n");
2910                 return rc;
2911         }
2912
2913         rc = goya_stop_external_queues(hdev);
2914
2915         if (rc) {
2916                 dev_err(hdev->dev, "failed to stop external queues\n");
2917                 return rc;
2918         }
2919
2920         rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2921         if (rc)
2922                 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2923
2924         return rc;
2925 }
2926
2927 int goya_resume(struct hl_device *hdev)
2928 {
2929         int rc;
2930
2931         goya_resume_external_queues(hdev);
2932         goya_resume_internal_queues(hdev);
2933
2934         rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
2935         if (rc)
2936                 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2937         return rc;
2938 }
2939
2940 static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2941                 u64 kaddress, phys_addr_t paddress, u32 size)
2942 {
2943         int rc;
2944
2945         vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2946                         VM_DONTCOPY | VM_NORESERVE;
2947
2948         rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
2949                                 size, vma->vm_page_prot);
2950         if (rc)
2951                 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
2952
2953         return rc;
2954 }
2955
2956 static void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2957 {
2958         u32 db_reg_offset, db_value;
2959         bool invalid_queue = false;
2960
2961         switch (hw_queue_id) {
2962         case GOYA_QUEUE_ID_DMA_0:
2963                 db_reg_offset = mmDMA_QM_0_PQ_PI;
2964                 break;
2965
2966         case GOYA_QUEUE_ID_DMA_1:
2967                 db_reg_offset = mmDMA_QM_1_PQ_PI;
2968                 break;
2969
2970         case GOYA_QUEUE_ID_DMA_2:
2971                 db_reg_offset = mmDMA_QM_2_PQ_PI;
2972                 break;
2973
2974         case GOYA_QUEUE_ID_DMA_3:
2975                 db_reg_offset = mmDMA_QM_3_PQ_PI;
2976                 break;
2977
2978         case GOYA_QUEUE_ID_DMA_4:
2979                 db_reg_offset = mmDMA_QM_4_PQ_PI;
2980                 break;
2981
2982         case GOYA_QUEUE_ID_CPU_PQ:
2983                 if (hdev->cpu_queues_enable)
2984                         db_reg_offset = mmCPU_IF_PF_PQ_PI;
2985                 else
2986                         invalid_queue = true;
2987                 break;
2988
2989         case GOYA_QUEUE_ID_MME:
2990                 db_reg_offset = mmMME_QM_PQ_PI;
2991                 break;
2992
2993         case GOYA_QUEUE_ID_TPC0:
2994                 db_reg_offset = mmTPC0_QM_PQ_PI;
2995                 break;
2996
2997         case GOYA_QUEUE_ID_TPC1:
2998                 db_reg_offset = mmTPC1_QM_PQ_PI;
2999                 break;
3000
3001         case GOYA_QUEUE_ID_TPC2:
3002                 db_reg_offset = mmTPC2_QM_PQ_PI;
3003                 break;
3004
3005         case GOYA_QUEUE_ID_TPC3:
3006                 db_reg_offset = mmTPC3_QM_PQ_PI;
3007                 break;
3008
3009         case GOYA_QUEUE_ID_TPC4:
3010                 db_reg_offset = mmTPC4_QM_PQ_PI;
3011                 break;
3012
3013         case GOYA_QUEUE_ID_TPC5:
3014                 db_reg_offset = mmTPC5_QM_PQ_PI;
3015                 break;
3016
3017         case GOYA_QUEUE_ID_TPC6:
3018                 db_reg_offset = mmTPC6_QM_PQ_PI;
3019                 break;
3020
3021         case GOYA_QUEUE_ID_TPC7:
3022                 db_reg_offset = mmTPC7_QM_PQ_PI;
3023                 break;
3024
3025         default:
3026                 invalid_queue = true;
3027         }
3028
3029         if (invalid_queue) {
3030                 /* Should never get here */
3031                 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3032                         hw_queue_id);
3033                 return;
3034         }
3035
3036         db_value = pi;
3037
3038         /* ring the doorbell */
3039         WREG32(db_reg_offset, db_value);
3040
3041         if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
3042                 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3043                                 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
3044 }
3045
3046 void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
3047 {
3048         /* Not needed in Goya */
3049 }
3050
3051 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3052                                         dma_addr_t *dma_handle, gfp_t flags)
3053 {
3054         return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
3055 }
3056
3057 static void goya_dma_free_coherent(struct hl_device *hdev, size_t size,
3058                                         void *cpu_addr, dma_addr_t dma_handle)
3059 {
3060         dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
3061 }
3062
3063 void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
3064                                 dma_addr_t *dma_handle, u16 *queue_len)
3065 {
3066         void *base;
3067         u32 offset;
3068
3069         *dma_handle = hdev->asic_prop.sram_base_address;
3070
3071         base = hdev->pcie_bar[SRAM_CFG_BAR_ID];
3072
3073         switch (queue_id) {
3074         case GOYA_QUEUE_ID_MME:
3075                 offset = MME_QMAN_BASE_OFFSET;
3076                 *queue_len = MME_QMAN_LENGTH;
3077                 break;
3078         case GOYA_QUEUE_ID_TPC0:
3079                 offset = TPC0_QMAN_BASE_OFFSET;
3080                 *queue_len = TPC_QMAN_LENGTH;
3081                 break;
3082         case GOYA_QUEUE_ID_TPC1:
3083                 offset = TPC1_QMAN_BASE_OFFSET;
3084                 *queue_len = TPC_QMAN_LENGTH;
3085                 break;
3086         case GOYA_QUEUE_ID_TPC2:
3087                 offset = TPC2_QMAN_BASE_OFFSET;
3088                 *queue_len = TPC_QMAN_LENGTH;
3089                 break;
3090         case GOYA_QUEUE_ID_TPC3:
3091                 offset = TPC3_QMAN_BASE_OFFSET;
3092                 *queue_len = TPC_QMAN_LENGTH;
3093                 break;
3094         case GOYA_QUEUE_ID_TPC4:
3095                 offset = TPC4_QMAN_BASE_OFFSET;
3096                 *queue_len = TPC_QMAN_LENGTH;
3097                 break;
3098         case GOYA_QUEUE_ID_TPC5:
3099                 offset = TPC5_QMAN_BASE_OFFSET;
3100                 *queue_len = TPC_QMAN_LENGTH;
3101                 break;
3102         case GOYA_QUEUE_ID_TPC6:
3103                 offset = TPC6_QMAN_BASE_OFFSET;
3104                 *queue_len = TPC_QMAN_LENGTH;
3105                 break;
3106         case GOYA_QUEUE_ID_TPC7:
3107                 offset = TPC7_QMAN_BASE_OFFSET;
3108                 *queue_len = TPC_QMAN_LENGTH;
3109                 break;
3110         default:
3111                 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3112                 return NULL;
3113         }
3114
3115         base += offset;
3116         *dma_handle += offset;
3117
3118         return base;
3119 }
3120
3121 static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
3122 {
3123         struct goya_device *goya = hdev->asic_specific;
3124         struct packet_msg_prot *fence_pkt;
3125         u32 *fence_ptr;
3126         dma_addr_t fence_dma_addr;
3127         struct hl_cb *cb;
3128         u32 tmp;
3129         int rc;
3130
3131         if (!hdev->asic_funcs->is_device_idle(hdev)) {
3132                 dev_err_ratelimited(hdev->dev,
3133                         "Can't send KMD job on QMAN0 if device is not idle\n");
3134                 return -EFAULT;
3135         }
3136
3137         fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3138                                                         &fence_dma_addr);
3139         if (!fence_ptr) {
3140                 dev_err(hdev->dev,
3141                         "Failed to allocate fence memory for QMAN0\n");
3142                 return -ENOMEM;
3143         }
3144
3145         *fence_ptr = 0;
3146
3147         if (goya->hw_cap_initialized & HW_CAP_MMU) {
3148                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
3149                 RREG32(mmDMA_QM_0_GLBL_PROT);
3150         }
3151
3152         /*
3153          * goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
3154          * synchronized kernel jobs we only need space for 1 packet_msg_prot
3155          */
3156         job->job_cb_size -= sizeof(struct packet_msg_prot);
3157
3158         cb = job->patched_cb;
3159
3160         fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
3161                         job->job_cb_size - sizeof(struct packet_msg_prot));
3162
3163         fence_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3164                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
3165                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3166         fence_pkt->value = GOYA_QMAN0_FENCE_VAL;
3167         fence_pkt->addr = fence_dma_addr +
3168                         hdev->asic_prop.host_phys_base_address;
3169
3170         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
3171                                         job->job_cb_size, cb->bus_address);
3172         if (rc) {
3173                 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
3174                 goto free_fence_ptr;
3175         }
3176
3177         rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
3178                                         HL_DEVICE_TIMEOUT_USEC, &tmp);
3179
3180         hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
3181
3182         if ((rc) || (tmp != GOYA_QMAN0_FENCE_VAL)) {
3183                 dev_err(hdev->dev, "QMAN0 Job hasn't finished in time\n");
3184                 rc = -ETIMEDOUT;
3185         }
3186
3187 free_fence_ptr:
3188         hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
3189                                         fence_dma_addr);
3190
3191         if (goya->hw_cap_initialized & HW_CAP_MMU) {
3192                 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
3193                 RREG32(mmDMA_QM_0_GLBL_PROT);
3194         }
3195
3196         return rc;
3197 }
3198
3199 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3200                                 u32 timeout, long *result)
3201 {
3202         struct goya_device *goya = hdev->asic_specific;
3203         struct armcp_packet *pkt;
3204         dma_addr_t pkt_dma_addr;
3205         u32 tmp;
3206         int rc = 0;
3207
3208         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3209                 if (result)
3210                         *result = 0;
3211                 return 0;
3212         }
3213
3214         if (len > CPU_CB_SIZE) {
3215                 dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
3216                         len);
3217                 return -ENOMEM;
3218         }
3219
3220         pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
3221                                                                 &pkt_dma_addr);
3222         if (!pkt) {
3223                 dev_err(hdev->dev,
3224                         "Failed to allocate DMA memory for packet to CPU\n");
3225                 return -ENOMEM;
3226         }
3227
3228         memcpy(pkt, msg, len);
3229
3230         mutex_lock(&hdev->send_cpu_message_lock);
3231
3232         if (hdev->disabled)
3233                 goto out;
3234
3235         rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_CPU_PQ, len,
3236                         pkt_dma_addr);
3237         if (rc) {
3238                 dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
3239                 goto out;
3240         }
3241
3242         rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
3243                                         timeout, &tmp);
3244
3245         hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_CPU_PQ);
3246
3247         if (rc == -ETIMEDOUT) {
3248                 dev_err(hdev->dev,
3249                         "Timeout while waiting for CPU packet fence\n");
3250                 goto out;
3251         }
3252
3253         if (tmp == ARMCP_PACKET_FENCE_VAL) {
3254                 rc = (pkt->ctl & ARMCP_PKT_CTL_RC_MASK) >>
3255                                                 ARMCP_PKT_CTL_RC_SHIFT;
3256                 if (rc) {
3257                         dev_err(hdev->dev,
3258                                 "F/W ERROR %d for CPU packet %d\n",
3259                                 rc, (pkt->ctl & ARMCP_PKT_CTL_OPCODE_MASK)
3260                                                 >> ARMCP_PKT_CTL_OPCODE_SHIFT);
3261                         rc = -EINVAL;
3262                 } else if (result) {
3263                         *result = pkt->result;
3264                 }
3265         } else {
3266                 dev_err(hdev->dev, "CPU packet wrong fence value\n");
3267                 rc = -EINVAL;
3268         }
3269
3270 out:
3271         mutex_unlock(&hdev->send_cpu_message_lock);
3272
3273         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
3274
3275         return rc;
3276 }
3277
3278 int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3279 {
3280         struct packet_msg_prot *fence_pkt;
3281         dma_addr_t pkt_dma_addr;
3282         u32 fence_val, tmp;
3283         dma_addr_t fence_dma_addr;
3284         u32 *fence_ptr;
3285         int rc;
3286
3287         fence_val = GOYA_QMAN0_FENCE_VAL;
3288
3289         fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3290                                                         &fence_dma_addr);
3291         if (!fence_ptr) {
3292                 dev_err(hdev->dev,
3293                         "Failed to allocate memory for queue testing\n");
3294                 return -ENOMEM;
3295         }
3296
3297         *fence_ptr = 0;
3298
3299         fence_pkt = hdev->asic_funcs->dma_pool_zalloc(hdev,
3300                                         sizeof(struct packet_msg_prot),
3301                                         GFP_KERNEL, &pkt_dma_addr);
3302         if (!fence_pkt) {
3303                 dev_err(hdev->dev,
3304                         "Failed to allocate packet for queue testing\n");
3305                 rc = -ENOMEM;
3306                 goto free_fence_ptr;
3307         }
3308
3309         fence_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3310                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
3311                         (1 << GOYA_PKT_CTL_MB_SHIFT);
3312         fence_pkt->value = fence_val;
3313         fence_pkt->addr = fence_dma_addr +
3314                                 hdev->asic_prop.host_phys_base_address;
3315
3316         rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3317                                         sizeof(struct packet_msg_prot),
3318                                         pkt_dma_addr);
3319         if (rc) {
3320                 dev_err(hdev->dev,
3321                         "Failed to send fence packet\n");
3322                 goto free_pkt;
3323         }
3324
3325         rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
3326                                         GOYA_TEST_QUEUE_WAIT_USEC, &tmp);
3327
3328         hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3329
3330         if ((!rc) && (tmp == fence_val)) {
3331                 dev_info(hdev->dev,
3332                         "queue test on H/W queue %d succeeded\n",
3333                         hw_queue_id);
3334         } else {
3335                 dev_err(hdev->dev,
3336                         "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3337                         hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3338                 rc = -EINVAL;
3339         }
3340
3341 free_pkt:
3342         hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_pkt,
3343                                         pkt_dma_addr);
3344 free_fence_ptr:
3345         hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
3346                                         fence_dma_addr);
3347         return rc;
3348 }
3349
3350 int goya_test_cpu_queue(struct hl_device *hdev)
3351 {
3352         struct armcp_packet test_pkt;
3353         long result;
3354         int rc;
3355
3356         /* cpu_queues_enable flag is always checked in send cpu message */
3357
3358         memset(&test_pkt, 0, sizeof(test_pkt));
3359
3360         test_pkt.ctl = ARMCP_PACKET_TEST << ARMCP_PKT_CTL_OPCODE_SHIFT;
3361         test_pkt.value = ARMCP_PACKET_FENCE_VAL;
3362
3363         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
3364                         sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
3365
3366         if (!rc) {
3367                 if (result == ARMCP_PACKET_FENCE_VAL)
3368                         dev_info(hdev->dev,
3369                                 "queue test on CPU queue succeeded\n");
3370                 else
3371                         dev_err(hdev->dev,
3372                                 "CPU queue test failed (0x%08lX)\n", result);
3373         } else {
3374                 dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
3375         }
3376
3377         return rc;
3378 }
3379
3380 static int goya_test_queues(struct hl_device *hdev)
3381 {
3382         struct goya_device *goya = hdev->asic_specific;
3383         int i, rc, ret_val = 0;
3384
3385         for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3386                 rc = goya_test_queue(hdev, i);
3387                 if (rc)
3388                         ret_val = -EINVAL;
3389         }
3390
3391         if (hdev->cpu_queues_enable) {
3392                 rc = goya->test_cpu_queue(hdev);
3393                 if (rc)
3394                         ret_val = -EINVAL;
3395         }
3396
3397         return ret_val;
3398 }
3399
3400 static void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3401                                         gfp_t mem_flags, dma_addr_t *dma_handle)
3402 {
3403         if (size > GOYA_DMA_POOL_BLK_SIZE)
3404                 return NULL;
3405
3406         return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3407 }
3408
3409 static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3410                                 dma_addr_t dma_addr)
3411 {
3412         dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
3413 }
3414
3415 static void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3416                                         size_t size, dma_addr_t *dma_handle)
3417 {
3418         u64 kernel_addr;
3419
3420         /* roundup to CPU_PKT_SIZE */
3421         size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
3422
3423         kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
3424
3425         *dma_handle = hdev->cpu_accessible_dma_address +
3426                 (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
3427
3428         return (void *) (uintptr_t) kernel_addr;
3429 }
3430
3431 static void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3432                                                 size_t size, void *vaddr)
3433 {
3434         /* roundup to CPU_PKT_SIZE */
3435         size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
3436
3437         gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
3438                         size);
3439 }
3440
3441 static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg,
3442                                 int nents, enum dma_data_direction dir)
3443 {
3444         if (!dma_map_sg(&hdev->pdev->dev, sg, nents, dir))
3445                 return -ENOMEM;
3446
3447         return 0;
3448 }
3449
3450 static void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sg,
3451                                 int nents, enum dma_data_direction dir)
3452 {
3453         dma_unmap_sg(&hdev->pdev->dev, sg, nents, dir);
3454 }
3455
3456 u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
3457 {
3458         struct scatterlist *sg, *sg_next_iter;
3459         u32 count, dma_desc_cnt;
3460         u64 len, len_next;
3461         dma_addr_t addr, addr_next;
3462
3463         dma_desc_cnt = 0;
3464
3465         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3466
3467                 len = sg_dma_len(sg);
3468                 addr = sg_dma_address(sg);
3469
3470                 if (len == 0)
3471                         break;
3472
3473                 while ((count + 1) < sgt->nents) {
3474                         sg_next_iter = sg_next(sg);
3475                         len_next = sg_dma_len(sg_next_iter);
3476                         addr_next = sg_dma_address(sg_next_iter);
3477
3478                         if (len_next == 0)
3479                                 break;
3480
3481                         if ((addr + len == addr_next) &&
3482                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3483                                 len += len_next;
3484                                 count++;
3485                                 sg = sg_next_iter;
3486                         } else {
3487                                 break;
3488                         }
3489                 }
3490
3491                 dma_desc_cnt++;
3492         }
3493
3494         return dma_desc_cnt * sizeof(struct packet_lin_dma);
3495 }
3496
3497 static int goya_pin_memory_before_cs(struct hl_device *hdev,
3498                                 struct hl_cs_parser *parser,
3499                                 struct packet_lin_dma *user_dma_pkt,
3500                                 u64 addr, enum dma_data_direction dir)
3501 {
3502         struct hl_userptr *userptr;
3503         int rc;
3504
3505         if (hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
3506                         parser->job_userptr_list, &userptr))
3507                 goto already_pinned;
3508
3509         userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3510         if (!userptr)
3511                 return -ENOMEM;
3512
3513         rc = hl_pin_host_memory(hdev, addr, user_dma_pkt->tsize, userptr);
3514         if (rc)
3515                 goto free_userptr;
3516
3517         list_add_tail(&userptr->job_node, parser->job_userptr_list);
3518
3519         rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3520                                         userptr->sgt->nents, dir);
3521         if (rc) {
3522                 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3523                 goto unpin_memory;
3524         }
3525
3526         userptr->dma_mapped = true;
3527         userptr->dir = dir;
3528
3529 already_pinned:
3530         parser->patched_cb_size +=
3531                         goya_get_dma_desc_list_size(hdev, userptr->sgt);
3532
3533         return 0;
3534
3535 unpin_memory:
3536         hl_unpin_host_memory(hdev, userptr);
3537 free_userptr:
3538         kfree(userptr);
3539         return rc;
3540 }
3541
3542 static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3543                                 struct hl_cs_parser *parser,
3544                                 struct packet_lin_dma *user_dma_pkt)
3545 {
3546         u64 device_memory_addr, addr;
3547         enum dma_data_direction dir;
3548         enum goya_dma_direction user_dir;
3549         bool sram_addr = true;
3550         bool skip_host_mem_pin = false;
3551         bool user_memset;
3552         int rc = 0;
3553
3554         user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3555                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3556
3557         user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3558                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3559
3560         switch (user_dir) {
3561         case DMA_HOST_TO_DRAM:
3562                 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3563                 dir = DMA_TO_DEVICE;
3564                 sram_addr = false;
3565                 addr = user_dma_pkt->src_addr;
3566                 device_memory_addr = user_dma_pkt->dst_addr;
3567                 if (user_memset)
3568                         skip_host_mem_pin = true;
3569                 break;
3570
3571         case DMA_DRAM_TO_HOST:
3572                 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3573                 dir = DMA_FROM_DEVICE;
3574                 sram_addr = false;
3575                 addr = user_dma_pkt->dst_addr;
3576                 device_memory_addr = user_dma_pkt->src_addr;
3577                 break;
3578
3579         case DMA_HOST_TO_SRAM:
3580                 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3581                 dir = DMA_TO_DEVICE;
3582                 addr = user_dma_pkt->src_addr;
3583                 device_memory_addr = user_dma_pkt->dst_addr;
3584                 if (user_memset)
3585                         skip_host_mem_pin = true;
3586                 break;
3587
3588         case DMA_SRAM_TO_HOST:
3589                 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3590                 dir = DMA_FROM_DEVICE;
3591                 addr = user_dma_pkt->dst_addr;
3592                 device_memory_addr = user_dma_pkt->src_addr;
3593                 break;
3594         default:
3595                 dev_err(hdev->dev, "DMA direction is undefined\n");
3596                 return -EFAULT;
3597         }
3598
3599         if (parser->ctx_id != HL_KERNEL_ASID_ID) {
3600                 if (sram_addr) {
3601                         if (!hl_mem_area_inside_range(device_memory_addr,
3602                                         user_dma_pkt->tsize,
3603                                         hdev->asic_prop.sram_user_base_address,
3604                                         hdev->asic_prop.sram_end_address)) {
3605
3606                                 dev_err(hdev->dev,
3607                                         "SRAM address 0x%llx + 0x%x is invalid\n",
3608                                         device_memory_addr,
3609                                         user_dma_pkt->tsize);
3610                                 return -EFAULT;
3611                         }
3612                 } else {
3613                         if (!hl_mem_area_inside_range(device_memory_addr,
3614                                         user_dma_pkt->tsize,
3615                                         hdev->asic_prop.dram_user_base_address,
3616                                         hdev->asic_prop.dram_end_address)) {
3617
3618                                 dev_err(hdev->dev,
3619                                         "DRAM address 0x%llx + 0x%x is invalid\n",
3620                                         device_memory_addr,
3621                                         user_dma_pkt->tsize);
3622                                 return -EFAULT;
3623                         }
3624                 }
3625         }
3626
3627         if (skip_host_mem_pin)
3628                 parser->patched_cb_size += sizeof(*user_dma_pkt);
3629         else {
3630                 if ((dir == DMA_TO_DEVICE) &&
3631                                 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3632                         dev_err(hdev->dev,
3633                                 "Can't DMA from host on queue other then 1\n");
3634                         return -EFAULT;
3635                 }
3636
3637                 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3638                                                 addr, dir);
3639         }
3640
3641         return rc;
3642 }
3643
3644 static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3645                                 struct hl_cs_parser *parser,
3646                                 struct packet_lin_dma *user_dma_pkt)
3647 {
3648         u64 sram_memory_addr, dram_memory_addr;
3649         enum goya_dma_direction user_dir;
3650
3651         user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3652                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3653
3654         if (user_dir == DMA_DRAM_TO_SRAM) {
3655                 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3656                 dram_memory_addr = user_dma_pkt->src_addr;
3657                 sram_memory_addr = user_dma_pkt->dst_addr;
3658         } else {
3659                 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3660                 sram_memory_addr = user_dma_pkt->src_addr;
3661                 dram_memory_addr = user_dma_pkt->dst_addr;
3662         }
3663
3664         if (!hl_mem_area_inside_range(sram_memory_addr, user_dma_pkt->tsize,
3665                                 hdev->asic_prop.sram_user_base_address,
3666                                 hdev->asic_prop.sram_end_address)) {
3667                 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3668                         sram_memory_addr, user_dma_pkt->tsize);
3669                 return -EFAULT;
3670         }
3671
3672         if (!hl_mem_area_inside_range(dram_memory_addr, user_dma_pkt->tsize,
3673                                 hdev->asic_prop.dram_user_base_address,
3674                                 hdev->asic_prop.dram_end_address)) {
3675                 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3676                         dram_memory_addr, user_dma_pkt->tsize);
3677                 return -EFAULT;
3678         }
3679
3680         parser->patched_cb_size += sizeof(*user_dma_pkt);
3681
3682         return 0;
3683 }
3684
3685 static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3686                                 struct hl_cs_parser *parser,
3687                                 struct packet_lin_dma *user_dma_pkt)
3688 {
3689         enum goya_dma_direction user_dir;
3690         int rc;
3691
3692         dev_dbg(hdev->dev, "DMA packet details:\n");
3693         dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
3694         dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
3695         dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
3696
3697         user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3698                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3699
3700         /*
3701          * Special handling for DMA with size 0. The H/W has a bug where
3702          * this can cause the QMAN DMA to get stuck, so block it here.
3703          */
3704         if (user_dma_pkt->tsize == 0) {
3705                 dev_err(hdev->dev,
3706                         "Got DMA with size 0, might reset the device\n");
3707                 return -EINVAL;
3708         }
3709
3710         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3711                 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3712         else
3713                 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3714
3715         return rc;
3716 }
3717
3718 static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3719                                 struct hl_cs_parser *parser,
3720                                 struct packet_lin_dma *user_dma_pkt)
3721 {
3722         dev_dbg(hdev->dev, "DMA packet details:\n");
3723         dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
3724         dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
3725         dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
3726
3727         /*
3728          * WA for HW-23.
3729          * We can't allow user to read from Host using QMANs other than 1.
3730          */
3731         if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
3732                 hl_mem_area_inside_range(user_dma_pkt->src_addr,
3733                                 user_dma_pkt->tsize,
3734                                 hdev->asic_prop.va_space_host_start_address,
3735                                 hdev->asic_prop.va_space_host_end_address)) {
3736                 dev_err(hdev->dev,
3737                         "Can't DMA from host on queue other then 1\n");
3738                 return -EFAULT;
3739         }
3740
3741         if (user_dma_pkt->tsize == 0) {
3742                 dev_err(hdev->dev,
3743                         "Got DMA with size 0, might reset the device\n");
3744                 return -EINVAL;
3745         }
3746
3747         parser->patched_cb_size += sizeof(*user_dma_pkt);
3748
3749         return 0;
3750 }
3751
3752 static int goya_validate_wreg32(struct hl_device *hdev,
3753                                 struct hl_cs_parser *parser,
3754                                 struct packet_wreg32 *wreg_pkt)
3755 {
3756         struct goya_device *goya = hdev->asic_specific;
3757         u32 sob_start_addr, sob_end_addr;
3758         u16 reg_offset;
3759
3760         reg_offset = wreg_pkt->ctl & GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3761
3762         dev_dbg(hdev->dev, "WREG32 packet details:\n");
3763         dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3764         dev_dbg(hdev->dev, "value      == 0x%x\n", wreg_pkt->value);
3765
3766         if (reg_offset != (mmDMA_CH_1_WR_COMP_ADDR_LO & 0xFFFF)) {
3767                 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3768                         reg_offset);
3769                 return -EPERM;
3770         }
3771
3772         /*
3773          * With MMU, DMA channels are not secured, so it doesn't matter where
3774          * the WR COMP will be written to because it will go out with
3775          * non-secured property
3776          */
3777         if (goya->hw_cap_initialized & HW_CAP_MMU)
3778                 return 0;
3779
3780         sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3781         sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3782
3783         if ((wreg_pkt->value < sob_start_addr) ||
3784                         (wreg_pkt->value > sob_end_addr)) {
3785
3786                 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3787                         wreg_pkt->value);
3788                 return -EPERM;
3789         }
3790
3791         return 0;
3792 }
3793
3794 static int goya_validate_cb(struct hl_device *hdev,
3795                         struct hl_cs_parser *parser, bool is_mmu)
3796 {
3797         u32 cb_parsed_length = 0;
3798         int rc = 0;
3799
3800         parser->patched_cb_size = 0;
3801
3802         /* cb_user_size is more than 0 so loop will always be executed */
3803         while (cb_parsed_length < parser->user_cb_size) {
3804                 enum packet_id pkt_id;
3805                 u16 pkt_size;
3806                 void *user_pkt;
3807
3808                 user_pkt = (void *) (uintptr_t)
3809                         (parser->user_cb->kernel_address + cb_parsed_length);
3810
3811                 pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
3812                                 PACKET_HEADER_PACKET_ID_MASK) >>
3813                                         PACKET_HEADER_PACKET_ID_SHIFT);
3814
3815                 pkt_size = goya_packet_sizes[pkt_id];
3816                 cb_parsed_length += pkt_size;
3817                 if (cb_parsed_length > parser->user_cb_size) {
3818                         dev_err(hdev->dev,
3819                                 "packet 0x%x is out of CB boundary\n", pkt_id);
3820                         rc = -EINVAL;
3821                         break;
3822                 }
3823
3824                 switch (pkt_id) {
3825                 case PACKET_WREG_32:
3826                         /*
3827                          * Although it is validated after copy in patch_cb(),
3828                          * need to validate here as well because patch_cb() is
3829                          * not called in MMU path while this function is called
3830                          */
3831                         rc = goya_validate_wreg32(hdev, parser, user_pkt);
3832                         break;
3833
3834                 case PACKET_WREG_BULK:
3835                         dev_err(hdev->dev,
3836                                 "User not allowed to use WREG_BULK\n");
3837                         rc = -EPERM;
3838                         break;
3839
3840                 case PACKET_MSG_PROT:
3841                         dev_err(hdev->dev,
3842                                 "User not allowed to use MSG_PROT\n");
3843                         rc = -EPERM;
3844                         break;
3845
3846                 case PACKET_CP_DMA:
3847                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3848                         rc = -EPERM;
3849                         break;
3850
3851                 case PACKET_STOP:
3852                         dev_err(hdev->dev, "User not allowed to use STOP\n");
3853                         rc = -EPERM;
3854                         break;
3855
3856                 case PACKET_LIN_DMA:
3857                         if (is_mmu)
3858                                 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3859                                                 user_pkt);
3860                         else
3861                                 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3862                                                 user_pkt);
3863                         break;
3864
3865                 case PACKET_MSG_LONG:
3866                 case PACKET_MSG_SHORT:
3867                 case PACKET_FENCE:
3868                 case PACKET_NOP:
3869                         parser->patched_cb_size += pkt_size;
3870                         break;
3871
3872                 default:
3873                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3874                                 pkt_id);
3875                         rc = -EINVAL;
3876                         break;
3877                 }
3878
3879                 if (rc)
3880                         break;
3881         }
3882
3883         /*
3884          * The new CB should have space at the end for two MSG_PROT packets:
3885          * 1. A packet that will act as a completion packet
3886          * 2. A packet that will generate MSI-X interrupt
3887          */
3888         parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3889
3890         return rc;
3891 }
3892
3893 static int goya_patch_dma_packet(struct hl_device *hdev,
3894                                 struct hl_cs_parser *parser,
3895                                 struct packet_lin_dma *user_dma_pkt,
3896                                 struct packet_lin_dma *new_dma_pkt,
3897                                 u32 *new_dma_pkt_size)
3898 {
3899         struct hl_userptr *userptr;
3900         struct scatterlist *sg, *sg_next_iter;
3901         u32 count, dma_desc_cnt;
3902         u64 len, len_next;
3903         dma_addr_t dma_addr, dma_addr_next;
3904         enum goya_dma_direction user_dir;
3905         u64 device_memory_addr, addr;
3906         enum dma_data_direction dir;
3907         struct sg_table *sgt;
3908         bool skip_host_mem_pin = false;
3909         bool user_memset;
3910         u32 user_rdcomp_mask, user_wrcomp_mask;
3911
3912         user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3913                         GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3914
3915         user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3916                         GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3917
3918         if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3919                         (user_dma_pkt->tsize == 0)) {
3920                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3921                 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3922                 return 0;
3923         }
3924
3925         if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3926                 addr = user_dma_pkt->src_addr;
3927                 device_memory_addr = user_dma_pkt->dst_addr;
3928                 dir = DMA_TO_DEVICE;
3929                 if (user_memset)
3930                         skip_host_mem_pin = true;
3931         } else {
3932                 addr = user_dma_pkt->dst_addr;
3933                 device_memory_addr = user_dma_pkt->src_addr;
3934                 dir = DMA_FROM_DEVICE;
3935         }
3936
3937         if ((!skip_host_mem_pin) &&
3938                 (hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
3939                         parser->job_userptr_list, &userptr) == false)) {
3940                 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3941                                 addr, user_dma_pkt->tsize);
3942                 return -EFAULT;
3943         }
3944
3945         if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3946                 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3947                 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3948                 return 0;
3949         }
3950
3951         user_rdcomp_mask =
3952                         (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK);
3953
3954         user_wrcomp_mask =
3955                         (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3956
3957         sgt = userptr->sgt;
3958         dma_desc_cnt = 0;
3959
3960         for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3961                 len = sg_dma_len(sg);
3962                 dma_addr = sg_dma_address(sg);
3963
3964                 if (len == 0)
3965                         break;
3966
3967                 while ((count + 1) < sgt->nents) {
3968                         sg_next_iter = sg_next(sg);
3969                         len_next = sg_dma_len(sg_next_iter);
3970                         dma_addr_next = sg_dma_address(sg_next_iter);
3971
3972                         if (len_next == 0)
3973                                 break;
3974
3975                         if ((dma_addr + len == dma_addr_next) &&
3976                                 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3977                                 len += len_next;
3978                                 count++;
3979                                 sg = sg_next_iter;
3980                         } else {
3981                                 break;
3982                         }
3983                 }
3984
3985                 new_dma_pkt->ctl = user_dma_pkt->ctl;
3986                 if (likely(dma_desc_cnt))
3987                         new_dma_pkt->ctl &= ~GOYA_PKT_CTL_EB_MASK;
3988                 new_dma_pkt->ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
3989                                         GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3990                 new_dma_pkt->tsize = len;
3991
3992                 dma_addr += hdev->asic_prop.host_phys_base_address;
3993
3994                 if (dir == DMA_TO_DEVICE) {
3995                         new_dma_pkt->src_addr = dma_addr;
3996                         new_dma_pkt->dst_addr = device_memory_addr;
3997                 } else {
3998                         new_dma_pkt->src_addr = device_memory_addr;
3999                         new_dma_pkt->dst_addr = dma_addr;
4000                 }
4001
4002                 if (!user_memset)
4003                         device_memory_addr += len;
4004                 dma_desc_cnt++;
4005                 new_dma_pkt++;
4006         }
4007
4008         if (!dma_desc_cnt) {
4009                 dev_err(hdev->dev,
4010                         "Error of 0 SG entries when patching DMA packet\n");
4011                 return -EFAULT;
4012         }
4013
4014         /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
4015         new_dma_pkt--;
4016         new_dma_pkt->ctl |= (user_rdcomp_mask | user_wrcomp_mask);
4017
4018         *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4019
4020         return 0;
4021 }
4022
4023 static int goya_patch_cb(struct hl_device *hdev,
4024                                 struct hl_cs_parser *parser)
4025 {
4026         u32 cb_parsed_length = 0;
4027         u32 cb_patched_cur_length = 0;
4028         int rc = 0;
4029
4030         /* cb_user_size is more than 0 so loop will always be executed */
4031         while (cb_parsed_length < parser->user_cb_size) {
4032                 enum packet_id pkt_id;
4033                 u16 pkt_size;
4034                 u32 new_pkt_size = 0;
4035                 void *user_pkt, *kernel_pkt;
4036
4037                 user_pkt = (void *) (uintptr_t)
4038                         (parser->user_cb->kernel_address + cb_parsed_length);
4039                 kernel_pkt = (void *) (uintptr_t)
4040                         (parser->patched_cb->kernel_address +
4041                                         cb_patched_cur_length);
4042
4043                 pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
4044                                 PACKET_HEADER_PACKET_ID_MASK) >>
4045                                         PACKET_HEADER_PACKET_ID_SHIFT);
4046
4047                 pkt_size = goya_packet_sizes[pkt_id];
4048                 cb_parsed_length += pkt_size;
4049                 if (cb_parsed_length > parser->user_cb_size) {
4050                         dev_err(hdev->dev,
4051                                 "packet 0x%x is out of CB boundary\n", pkt_id);
4052                         rc = -EINVAL;
4053                         break;
4054                 }
4055
4056                 switch (pkt_id) {
4057                 case PACKET_LIN_DMA:
4058                         rc = goya_patch_dma_packet(hdev, parser, user_pkt,
4059                                                 kernel_pkt, &new_pkt_size);
4060                         cb_patched_cur_length += new_pkt_size;
4061                         break;
4062
4063                 case PACKET_WREG_32:
4064                         memcpy(kernel_pkt, user_pkt, pkt_size);
4065                         cb_patched_cur_length += pkt_size;
4066                         rc = goya_validate_wreg32(hdev, parser, kernel_pkt);
4067                         break;
4068
4069                 case PACKET_WREG_BULK:
4070                         dev_err(hdev->dev,
4071                                 "User not allowed to use WREG_BULK\n");
4072                         rc = -EPERM;
4073                         break;
4074
4075                 case PACKET_MSG_PROT:
4076                         dev_err(hdev->dev,
4077                                 "User not allowed to use MSG_PROT\n");
4078                         rc = -EPERM;
4079                         break;
4080
4081                 case PACKET_CP_DMA:
4082                         dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4083                         rc = -EPERM;
4084                         break;
4085
4086                 case PACKET_STOP:
4087                         dev_err(hdev->dev, "User not allowed to use STOP\n");
4088                         rc = -EPERM;
4089                         break;
4090
4091                 case PACKET_MSG_LONG:
4092                 case PACKET_MSG_SHORT:
4093                 case PACKET_FENCE:
4094                 case PACKET_NOP:
4095                         memcpy(kernel_pkt, user_pkt, pkt_size);
4096                         cb_patched_cur_length += pkt_size;
4097                         break;
4098
4099                 default:
4100                         dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4101                                 pkt_id);
4102                         rc = -EINVAL;
4103                         break;
4104                 }
4105
4106                 if (rc)
4107                         break;
4108         }
4109
4110         return rc;
4111 }
4112
4113 static int goya_parse_cb_mmu(struct hl_device *hdev,
4114                 struct hl_cs_parser *parser)
4115 {
4116         u64 patched_cb_handle;
4117         u32 patched_cb_size;
4118         struct hl_cb *user_cb;
4119         int rc;
4120
4121         /*
4122          * The new CB should have space at the end for two MSG_PROT pkt:
4123          * 1. A packet that will act as a completion packet
4124          * 2. A packet that will generate MSI-X interrupt
4125          */
4126         parser->patched_cb_size = parser->user_cb_size +
4127                         sizeof(struct packet_msg_prot) * 2;
4128
4129         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4130                                 parser->patched_cb_size,
4131                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
4132
4133         if (rc) {
4134                 dev_err(hdev->dev,
4135                         "Failed to allocate patched CB for DMA CS %d\n",
4136                         rc);
4137                 return rc;
4138         }
4139
4140         patched_cb_handle >>= PAGE_SHIFT;
4141         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4142                                 (u32) patched_cb_handle);
4143         /* hl_cb_get should never fail here so use kernel WARN */
4144         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4145                         (u32) patched_cb_handle);
4146         if (!parser->patched_cb) {
4147                 rc = -EFAULT;
4148                 goto out;
4149         }
4150
4151         /*
4152          * The check that parser->user_cb_size <= parser->user_cb->size was done
4153          * in validate_queue_index().
4154          */
4155         memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4156                 (void *) (uintptr_t) parser->user_cb->kernel_address,
4157                 parser->user_cb_size);
4158
4159         patched_cb_size = parser->patched_cb_size;
4160
4161         /* validate patched CB instead of user CB */
4162         user_cb = parser->user_cb;
4163         parser->user_cb = parser->patched_cb;
4164         rc = goya_validate_cb(hdev, parser, true);
4165         parser->user_cb = user_cb;
4166
4167         if (rc) {
4168                 hl_cb_put(parser->patched_cb);
4169                 goto out;
4170         }
4171
4172         if (patched_cb_size != parser->patched_cb_size) {
4173                 dev_err(hdev->dev, "user CB size mismatch\n");
4174                 hl_cb_put(parser->patched_cb);
4175                 rc = -EINVAL;
4176                 goto out;
4177         }
4178
4179 out:
4180         /*
4181          * Always call cb destroy here because we still have 1 reference
4182          * to it by calling cb_get earlier. After the job will be completed,
4183          * cb_put will release it, but here we want to remove it from the
4184          * idr
4185          */
4186         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4187                                         patched_cb_handle << PAGE_SHIFT);
4188
4189         return rc;
4190 }
4191
4192 static int goya_parse_cb_no_mmu(struct hl_device *hdev,
4193                                 struct hl_cs_parser *parser)
4194 {
4195         u64 patched_cb_handle;
4196         int rc;
4197
4198         rc = goya_validate_cb(hdev, parser, false);
4199
4200         if (rc)
4201                 goto free_userptr;
4202
4203         rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4204                                 parser->patched_cb_size,
4205                                 &patched_cb_handle, HL_KERNEL_ASID_ID);
4206         if (rc) {
4207                 dev_err(hdev->dev,
4208                         "Failed to allocate patched CB for DMA CS %d\n", rc);
4209                 goto free_userptr;
4210         }
4211
4212         patched_cb_handle >>= PAGE_SHIFT;
4213         parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4214                                 (u32) patched_cb_handle);
4215         /* hl_cb_get should never fail here so use kernel WARN */
4216         WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4217                         (u32) patched_cb_handle);
4218         if (!parser->patched_cb) {
4219                 rc = -EFAULT;
4220                 goto out;
4221         }
4222
4223         rc = goya_patch_cb(hdev, parser);
4224
4225         if (rc)
4226                 hl_cb_put(parser->patched_cb);
4227
4228 out:
4229         /*
4230          * Always call cb destroy here because we still have 1 reference
4231          * to it by calling cb_get earlier. After the job will be completed,
4232          * cb_put will release it, but here we want to remove it from the
4233          * idr
4234          */
4235         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4236                                 patched_cb_handle << PAGE_SHIFT);
4237
4238 free_userptr:
4239         if (rc)
4240                 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4241         return rc;
4242 }
4243
4244 static int goya_parse_cb_no_ext_quque(struct hl_device *hdev,
4245                                         struct hl_cs_parser *parser)
4246 {
4247         struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4248         struct goya_device *goya = hdev->asic_specific;
4249
4250         if (!(goya->hw_cap_initialized & HW_CAP_MMU)) {
4251                 /* For internal queue jobs, just check if cb address is valid */
4252                 if (hl_mem_area_inside_range(
4253                                 (u64) (uintptr_t) parser->user_cb,
4254                                 parser->user_cb_size,
4255                                 asic_prop->sram_user_base_address,
4256                                 asic_prop->sram_end_address))
4257                         return 0;
4258
4259                 if (hl_mem_area_inside_range(
4260                                 (u64) (uintptr_t) parser->user_cb,
4261                                 parser->user_cb_size,
4262                                 asic_prop->dram_user_base_address,
4263                                 asic_prop->dram_end_address))
4264                         return 0;
4265
4266                 dev_err(hdev->dev,
4267                         "Internal CB address 0x%llx + 0x%x is not in SRAM nor in DRAM\n",
4268                         (u64) (uintptr_t) parser->user_cb,
4269                         parser->user_cb_size);
4270
4271                 return -EFAULT;
4272         }
4273
4274         return 0;
4275 }
4276
4277 int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4278 {
4279         struct goya_device *goya = hdev->asic_specific;
4280
4281         if (!parser->ext_queue)
4282                 return goya_parse_cb_no_ext_quque(hdev, parser);
4283
4284         if ((goya->hw_cap_initialized & HW_CAP_MMU) && parser->use_virt_addr)
4285                 return goya_parse_cb_mmu(hdev, parser);
4286         else
4287                 return goya_parse_cb_no_mmu(hdev, parser);
4288 }
4289
4290 void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
4291                                 u32 cq_val, u32 msix_vec)
4292 {
4293         struct packet_msg_prot *cq_pkt;
4294
4295         cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4296                 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4297
4298         cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4299                         (1 << GOYA_PKT_CTL_EB_SHIFT) |
4300                         (1 << GOYA_PKT_CTL_MB_SHIFT);
4301         cq_pkt->value = cq_val;
4302         cq_pkt->addr = cq_addr;
4303
4304         cq_pkt++;
4305
4306         cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4307                         (1 << GOYA_PKT_CTL_MB_SHIFT);
4308         cq_pkt->value = msix_vec & 0x7FF;
4309         cq_pkt->addr = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
4310 }
4311
4312 static void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4313 {
4314         WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
4315 }
4316
4317 static void goya_restore_phase_topology(struct hl_device *hdev)
4318 {
4319         int i, num_of_sob_in_longs, num_of_mon_in_longs;
4320
4321         num_of_sob_in_longs =
4322                 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4323
4324         num_of_mon_in_longs =
4325                 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4326
4327         for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4328                 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4329
4330         for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4331                 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4332
4333         /* Flush all WREG to prevent race */
4334         i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4335 }
4336
4337 /*
4338  * goya_debugfs_read32 - read a 32bit value from a given device address
4339  *
4340  * @hdev:       pointer to hl_device structure
4341  * @addr:       address in device
4342  * @val:        returned value
4343  *
4344  * In case of DDR address that is not mapped into the default aperture that
4345  * the DDR bar exposes, the function will configure the iATU so that the DDR
4346  * bar will be positioned at a base address that allows reading from the
4347  * required address. Configuring the iATU during normal operation can
4348  * lead to undefined behavior and therefore, should be done with extreme care
4349  *
4350  */
4351 static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4352 {
4353         struct asic_fixed_properties *prop = &hdev->asic_prop;
4354         int rc = 0;
4355
4356         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4357                 *val = RREG32(addr - CFG_BASE);
4358
4359         } else if ((addr >= SRAM_BASE_ADDR) &&
4360                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4361
4362                 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4363                                 (addr - SRAM_BASE_ADDR));
4364
4365         } else if ((addr >= DRAM_PHYS_BASE) &&
4366                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4367
4368                 u64 bar_base_addr = DRAM_PHYS_BASE +
4369                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4370
4371                 rc = goya_set_ddr_bar_base(hdev, bar_base_addr);
4372                 if (!rc) {
4373                         *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4374                                                 (addr - bar_base_addr));
4375
4376                         rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
4377                                 (MMU_PAGE_TABLES_ADDR &
4378                                         ~(prop->dram_pci_bar_size - 0x1ull)));
4379                 }
4380         } else {
4381                 rc = -EFAULT;
4382         }
4383
4384         return rc;
4385 }
4386
4387 /*
4388  * goya_debugfs_write32 - write a 32bit value to a given device address
4389  *
4390  * @hdev:       pointer to hl_device structure
4391  * @addr:       address in device
4392  * @val:        returned value
4393  *
4394  * In case of DDR address that is not mapped into the default aperture that
4395  * the DDR bar exposes, the function will configure the iATU so that the DDR
4396  * bar will be positioned at a base address that allows writing to the
4397  * required address. Configuring the iATU during normal operation can
4398  * lead to undefined behavior and therefore, should be done with extreme care
4399  *
4400  */
4401 static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4402 {
4403         struct asic_fixed_properties *prop = &hdev->asic_prop;
4404         int rc = 0;
4405
4406         if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4407                 WREG32(addr - CFG_BASE, val);
4408
4409         } else if ((addr >= SRAM_BASE_ADDR) &&
4410                         (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4411
4412                 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4413                                         (addr - SRAM_BASE_ADDR));
4414
4415         } else if ((addr >= DRAM_PHYS_BASE) &&
4416                         (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4417
4418                 u64 bar_base_addr = DRAM_PHYS_BASE +
4419                                 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4420
4421                 rc = goya_set_ddr_bar_base(hdev, bar_base_addr);
4422                 if (!rc) {
4423                         writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4424                                                 (addr - bar_base_addr));
4425
4426                         rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
4427                                 (MMU_PAGE_TABLES_ADDR &
4428                                         ~(prop->dram_pci_bar_size - 0x1ull)));
4429                 }
4430         } else {
4431                 rc = -EFAULT;
4432         }
4433
4434         return rc;
4435 }
4436
4437 static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4438 {
4439         struct goya_device *goya = hdev->asic_specific;
4440
4441         return readq(hdev->pcie_bar[DDR_BAR_ID] +
4442                         (addr - goya->ddr_bar_cur_addr));
4443 }
4444
4445 static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4446 {
4447         struct goya_device *goya = hdev->asic_specific;
4448
4449         writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4450                         (addr - goya->ddr_bar_cur_addr));
4451 }
4452
4453 static const char *_goya_get_event_desc(u16 event_type)
4454 {
4455         switch (event_type) {
4456         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4457                 return "PCIe_dec";
4458         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4459         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4460         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4461         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4462         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4463         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4464         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4465         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4466                 return "TPC%d_dec";
4467         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4468                 return "MME_wacs";
4469         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4470                 return "MME_wacsd";
4471         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4472                 return "CPU_axi_splitter";
4473         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4474                 return "PSOC_axi_dec";
4475         case GOYA_ASYNC_EVENT_ID_PSOC:
4476                 return "PSOC";
4477         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4478         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4479         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4480         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4481         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4482         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4483         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4484         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4485                 return "TPC%d_krn_err";
4486         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4487                 return "TPC%d_cq";
4488         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4489                 return "TPC%d_qm";
4490         case GOYA_ASYNC_EVENT_ID_MME_QM:
4491                 return "MME_qm";
4492         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4493                 return "MME_cq";
4494         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4495                 return "DMA%d_qm";
4496         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4497                 return "DMA%d_ch";
4498         default:
4499                 return "N/A";
4500         }
4501 }
4502
4503 static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
4504 {
4505         u8 index;
4506
4507         switch (event_type) {
4508         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4509         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4510         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4511         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4512         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4513         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4514         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4515         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4516                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_DEC) / 3;
4517                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4518                 break;
4519         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4520         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4521         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4522         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4523         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4524         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4525         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4526         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4527                 index = (event_type - GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR) / 10;
4528                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4529                 break;
4530         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_CMDQ:
4531                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_CMDQ;
4532                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4533                 break;
4534         case GOYA_ASYNC_EVENT_ID_TPC0_QM ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4535                 index = event_type - GOYA_ASYNC_EVENT_ID_TPC0_QM;
4536                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4537                 break;
4538         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4539                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM;
4540                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4541                 break;
4542         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4543                 index = event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH;
4544                 snprintf(desc, size, _goya_get_event_desc(event_type), index);
4545                 break;
4546         default:
4547                 snprintf(desc, size, _goya_get_event_desc(event_type));
4548                 break;
4549         }
4550 }
4551
4552 static void goya_print_razwi_info(struct hl_device *hdev)
4553 {
4554         if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4555                 dev_err(hdev->dev, "Illegal write to LBW\n");
4556                 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4557         }
4558
4559         if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4560                 dev_err(hdev->dev, "Illegal read from LBW\n");
4561                 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4562         }
4563
4564         if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4565                 dev_err(hdev->dev, "Illegal write to HBW\n");
4566                 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4567         }
4568
4569         if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4570                 dev_err(hdev->dev, "Illegal read from HBW\n");
4571                 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4572         }
4573 }
4574
4575 static void goya_print_mmu_error_info(struct hl_device *hdev)
4576 {
4577         struct goya_device *goya = hdev->asic_specific;
4578         u64 addr;
4579         u32 val;
4580
4581         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4582                 return;
4583
4584         val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4585         if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4586                 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4587                 addr <<= 32;
4588                 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4589
4590                 dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
4591
4592                 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4593         }
4594 }
4595
4596 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
4597 {
4598         char desc[20] = "";
4599
4600         goya_get_event_desc(event_type, desc, sizeof(desc));
4601         dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
4602                 event_type, desc);
4603
4604         goya_print_razwi_info(hdev);
4605         goya_print_mmu_error_info(hdev);
4606 }
4607
4608 static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4609                 size_t irq_arr_size)
4610 {
4611         struct armcp_unmask_irq_arr_packet *pkt;
4612         size_t total_pkt_size;
4613         long result;
4614         int rc;
4615
4616         total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
4617                         irq_arr_size;
4618
4619         /* data should be aligned to 8 bytes in order to ArmCP to copy it */
4620         total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4621
4622         /* total_pkt_size is casted to u16 later on */
4623         if (total_pkt_size > USHRT_MAX) {
4624                 dev_err(hdev->dev, "too many elements in IRQ array\n");
4625                 return -EINVAL;
4626         }
4627
4628         pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4629         if (!pkt)
4630                 return -ENOMEM;
4631
4632         pkt->length = irq_arr_size / sizeof(irq_arr[0]);
4633         memcpy(&pkt->irqs, irq_arr, irq_arr_size);
4634
4635         pkt->armcp_pkt.ctl = ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4636                                                 ARMCP_PKT_CTL_OPCODE_SHIFT;
4637
4638         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4639                         total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
4640
4641         if (rc)
4642                 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4643
4644         kfree(pkt);
4645
4646         return rc;
4647 }
4648
4649 static int goya_soft_reset_late_init(struct hl_device *hdev)
4650 {
4651         /*
4652          * Unmask all IRQs since some could have been received
4653          * during the soft reset
4654          */
4655         return goya_unmask_irq_arr(hdev, goya_non_fatal_events,
4656                         sizeof(goya_non_fatal_events));
4657 }
4658
4659 static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4660 {
4661         struct armcp_packet pkt;
4662         long result;
4663         int rc;
4664
4665         memset(&pkt, 0, sizeof(pkt));
4666
4667         pkt.ctl = ARMCP_PACKET_UNMASK_RAZWI_IRQ << ARMCP_PKT_CTL_OPCODE_SHIFT;
4668         pkt.value = event_type;
4669
4670         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4671                         HL_DEVICE_TIMEOUT_USEC, &result);
4672
4673         if (rc)
4674                 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4675
4676         return rc;
4677 }
4678
4679 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4680 {
4681         u16 event_type = ((eq_entry->hdr.ctl & EQ_CTL_EVENT_TYPE_MASK)
4682                         >> EQ_CTL_EVENT_TYPE_SHIFT);
4683         struct goya_device *goya = hdev->asic_specific;
4684
4685         goya->events_stat[event_type]++;
4686
4687         switch (event_type) {
4688         case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4689         case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4690         case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4691         case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4692         case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4693         case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4694         case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4695         case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4696         case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4697         case GOYA_ASYNC_EVENT_ID_MME_ECC:
4698         case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4699         case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4700         case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4701         case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4702         case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4703         case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4704         case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4705         case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4706         case GOYA_ASYNC_EVENT_ID_GIC500:
4707         case GOYA_ASYNC_EVENT_ID_PLL0:
4708         case GOYA_ASYNC_EVENT_ID_PLL1:
4709         case GOYA_ASYNC_EVENT_ID_PLL3:
4710         case GOYA_ASYNC_EVENT_ID_PLL4:
4711         case GOYA_ASYNC_EVENT_ID_PLL5:
4712         case GOYA_ASYNC_EVENT_ID_PLL6:
4713         case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4714         case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4715         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4716         case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4717                 dev_err(hdev->dev,
4718                         "Received H/W interrupt %d, reset the chip\n",
4719                         event_type);
4720                 hl_device_reset(hdev, true, false);
4721                 break;
4722
4723         case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4724         case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4725         case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4726         case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4727         case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4728         case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4729         case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4730         case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4731         case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4732         case GOYA_ASYNC_EVENT_ID_MME_WACS:
4733         case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4734         case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4735         case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4736         case GOYA_ASYNC_EVENT_ID_PSOC:
4737         case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4738         case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4739         case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4740         case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4741         case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4742         case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4743         case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4744         case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4745         case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4746         case GOYA_ASYNC_EVENT_ID_MME_QM:
4747         case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4748         case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4749         case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4750                 goya_print_irq_info(hdev, event_type);
4751                 goya_unmask_irq(hdev, event_type);
4752                 break;
4753
4754         case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4755         case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4756         case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4757         case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4758         case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4759         case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4760         case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4761         case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4762         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0:
4763         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH1:
4764         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH2:
4765         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH3:
4766         case GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4767                 dev_info(hdev->dev, "Received H/W interrupt %d\n", event_type);
4768                 break;
4769
4770         default:
4771                 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4772                                 event_type);
4773                 break;
4774         }
4775 }
4776
4777 void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
4778 {
4779         struct goya_device *goya = hdev->asic_specific;
4780
4781         *size = (u32) sizeof(goya->events_stat);
4782
4783         return goya->events_stat;
4784 }
4785
4786 static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
4787                                 u64 val, bool is_dram)
4788 {
4789         struct packet_lin_dma *lin_dma_pkt;
4790         struct hl_cs_parser parser;
4791         struct hl_cs_job *job;
4792         u32 cb_size;
4793         struct hl_cb *cb;
4794         int rc;
4795
4796         cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4797         if (!cb)
4798                 return -EFAULT;
4799
4800         lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4801
4802         memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4803         cb_size = sizeof(*lin_dma_pkt);
4804
4805         lin_dma_pkt->ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4806                                 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4807                                 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4808                                 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4809                                 (1 << GOYA_PKT_CTL_MB_SHIFT));
4810
4811         lin_dma_pkt->ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
4812                                 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
4813
4814         lin_dma_pkt->src_addr = val;
4815         lin_dma_pkt->dst_addr = addr;
4816         lin_dma_pkt->tsize = size;
4817
4818         job = hl_cs_allocate_job(hdev, true);
4819         if (!job) {
4820                 dev_err(hdev->dev, "Failed to allocate a new job\n");
4821                 rc = -ENOMEM;
4822                 goto release_cb;
4823         }
4824
4825         job->id = 0;
4826         job->user_cb = cb;
4827         job->user_cb->cs_cnt++;
4828         job->user_cb_size = cb_size;
4829         job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4830
4831         hl_debugfs_add_job(hdev, job);
4832
4833         parser.ctx_id = HL_KERNEL_ASID_ID;
4834         parser.cs_sequence = 0;
4835         parser.job_id = job->id;
4836         parser.hw_queue_id = job->hw_queue_id;
4837         parser.job_userptr_list = &job->userptr_list;
4838         parser.user_cb = job->user_cb;
4839         parser.user_cb_size = job->user_cb_size;
4840         parser.ext_queue = job->ext_queue;
4841         parser.use_virt_addr = hdev->mmu_enable;
4842
4843         rc = hdev->asic_funcs->cs_parser(hdev, &parser);
4844         if (rc) {
4845                 dev_err(hdev->dev, "Failed to parse kernel CB\n");
4846                 goto free_job;
4847         }
4848
4849         job->patched_cb = parser.patched_cb;
4850         job->job_cb_size = parser.patched_cb_size;
4851         job->patched_cb->cs_cnt++;
4852
4853         rc = goya_send_job_on_qman0(hdev, job);
4854
4855         job->patched_cb->cs_cnt--;
4856         hl_cb_put(job->patched_cb);
4857
4858 free_job:
4859         hl_userptr_delete_list(hdev, &job->userptr_list);
4860         hl_debugfs_remove_job(hdev, job);
4861         kfree(job);
4862         cb->cs_cnt--;
4863
4864 release_cb:
4865         hl_cb_put(cb);
4866         hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4867
4868         return rc;
4869 }
4870
4871 static int goya_context_switch(struct hl_device *hdev, u32 asid)
4872 {
4873         struct asic_fixed_properties *prop = &hdev->asic_prop;
4874         u64 addr = prop->sram_base_address;
4875         u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
4876         u64 val = 0x7777777777777777ull;
4877         int rc;
4878
4879         rc = goya_memset_device_memory(hdev, addr, size, val, false);
4880         if (rc) {
4881                 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4882                 return rc;
4883         }
4884
4885         goya_mmu_prepare(hdev, asid);
4886
4887         return 0;
4888 }
4889
4890 static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4891 {
4892         struct asic_fixed_properties *prop = &hdev->asic_prop;
4893         struct goya_device *goya = hdev->asic_specific;
4894         u64 addr = prop->mmu_pgt_addr;
4895         u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
4896                         MMU_CACHE_MNG_SIZE;
4897
4898         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4899                 return 0;
4900
4901         return goya_memset_device_memory(hdev, addr, size, 0, true);
4902 }
4903
4904 static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
4905 {
4906         struct goya_device *goya = hdev->asic_specific;
4907         u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
4908         u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
4909         u64 val = 0x9999999999999999ull;
4910
4911         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4912                 return 0;
4913
4914         return goya_memset_device_memory(hdev, addr, size, val, true);
4915 }
4916
4917 static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4918 {
4919         struct goya_device *goya = hdev->asic_specific;
4920         int i;
4921
4922         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4923                 return;
4924
4925         if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4926                 WARN(1, "asid %u is too big\n", asid);
4927                 return;
4928         }
4929
4930         /* zero the MMBP and ASID bits and then set the ASID */
4931         for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) {
4932                 WREG32_AND(goya_mmu_regs[i], ~0x7FF);
4933                 WREG32_OR(goya_mmu_regs[i], asid);
4934         }
4935 }
4936
4937 static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
4938 {
4939         struct goya_device *goya = hdev->asic_specific;
4940         u32 status, timeout_usec;
4941         int rc;
4942
4943         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4944                 return;
4945
4946         /* no need in L1 only invalidation in Goya */
4947         if (!is_hard)
4948                 return;
4949
4950         if (hdev->pldm)
4951                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4952         else
4953                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4954
4955         mutex_lock(&hdev->mmu_cache_lock);
4956
4957         /* L0 & L1 invalidation */
4958         WREG32(mmSTLB_INV_ALL_START, 1);
4959
4960         rc = hl_poll_timeout(
4961                 hdev,
4962                 mmSTLB_INV_ALL_START,
4963                 status,
4964                 !status,
4965                 1000,
4966                 timeout_usec);
4967
4968         mutex_unlock(&hdev->mmu_cache_lock);
4969
4970         if (rc)
4971                 dev_notice_ratelimited(hdev->dev,
4972                         "Timeout when waiting for MMU cache invalidation\n");
4973 }
4974
4975 static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
4976                 bool is_hard, u32 asid, u64 va, u64 size)
4977 {
4978         struct goya_device *goya = hdev->asic_specific;
4979         u32 status, timeout_usec, inv_data, pi;
4980         int rc;
4981
4982         if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4983                 return;
4984
4985         /* no need in L1 only invalidation in Goya */
4986         if (!is_hard)
4987                 return;
4988
4989         if (hdev->pldm)
4990                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4991         else
4992                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4993
4994         mutex_lock(&hdev->mmu_cache_lock);
4995
4996         /*
4997          * TODO: currently invalidate entire L0 & L1 as in regular hard
4998          * invalidation. Need to apply invalidation of specific cache lines with
4999          * mask of ASID & VA & size.
5000          * Note that L1 with be flushed entirely in any case.
5001          */
5002
5003         /* L0 & L1 invalidation */
5004         inv_data = RREG32(mmSTLB_CACHE_INV);
5005         /* PI is 8 bit */
5006         pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5007         WREG32(mmSTLB_CACHE_INV,
5008                         (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5009
5010         rc = hl_poll_timeout(
5011                 hdev,
5012                 mmSTLB_INV_CONSUMER_INDEX,
5013                 status,
5014                 status == pi,
5015                 1000,
5016                 timeout_usec);
5017
5018         mutex_unlock(&hdev->mmu_cache_lock);
5019
5020         if (rc)
5021                 dev_notice_ratelimited(hdev->dev,
5022                         "Timeout when waiting for MMU cache invalidation\n");
5023 }
5024
5025 static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
5026                                                 u64 phys_addr)
5027 {
5028         u32 status, timeout_usec;
5029         int rc;
5030
5031         if (hdev->pldm)
5032                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5033         else
5034                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5035
5036         WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5037         WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
5038         WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
5039
5040         rc = hl_poll_timeout(
5041                 hdev,
5042                 MMU_ASID_BUSY,
5043                 status,
5044                 !(status & 0x80000000),
5045                 1000,
5046                 timeout_usec);
5047
5048         if (rc) {
5049                 dev_err(hdev->dev,
5050                         "Timeout during MMU hop0 config of asid %d\n", asid);
5051                 return rc;
5052         }
5053
5054         return 0;
5055 }
5056
5057 int goya_send_heartbeat(struct hl_device *hdev)
5058 {
5059         struct goya_device *goya = hdev->asic_specific;
5060         struct armcp_packet hb_pkt;
5061         long result;
5062         int rc;
5063
5064         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5065                 return 0;
5066
5067         memset(&hb_pkt, 0, sizeof(hb_pkt));
5068
5069         hb_pkt.ctl = ARMCP_PACKET_TEST << ARMCP_PKT_CTL_OPCODE_SHIFT;
5070         hb_pkt.value = ARMCP_PACKET_FENCE_VAL;
5071
5072         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
5073                         sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
5074
5075         if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
5076                 rc = -EIO;
5077
5078         return rc;
5079 }
5080
5081 static int goya_armcp_info_get(struct hl_device *hdev)
5082 {
5083         struct goya_device *goya = hdev->asic_specific;
5084         struct asic_fixed_properties *prop = &hdev->asic_prop;
5085         struct armcp_packet pkt;
5086         void *armcp_info_cpu_addr;
5087         dma_addr_t armcp_info_dma_addr;
5088         u64 dram_size;
5089         long result;
5090         int rc;
5091
5092         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5093                 return 0;
5094
5095         armcp_info_cpu_addr =
5096                         hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
5097                         sizeof(struct armcp_info), &armcp_info_dma_addr);
5098         if (!armcp_info_cpu_addr) {
5099                 dev_err(hdev->dev,
5100                         "Failed to allocate DMA memory for ArmCP info packet\n");
5101                 return -ENOMEM;
5102         }
5103
5104         memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
5105
5106         memset(&pkt, 0, sizeof(pkt));
5107
5108         pkt.ctl = ARMCP_PACKET_INFO_GET << ARMCP_PKT_CTL_OPCODE_SHIFT;
5109         pkt.addr = armcp_info_dma_addr + prop->host_phys_base_address;
5110         pkt.data_max_size = sizeof(struct armcp_info);
5111
5112         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
5113                         GOYA_ARMCP_INFO_TIMEOUT, &result);
5114
5115         if (rc) {
5116                 dev_err(hdev->dev,
5117                         "Failed to send armcp info pkt, error %d\n", rc);
5118                 goto out;
5119         }
5120
5121         memcpy(&prop->armcp_info, armcp_info_cpu_addr,
5122                         sizeof(prop->armcp_info));
5123
5124         dram_size = prop->armcp_info.dram_size;
5125         if (dram_size) {
5126                 if ((!is_power_of_2(dram_size)) ||
5127                                 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5128                         dev_err(hdev->dev,
5129                                 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5130                                 dram_size);
5131                         dram_size = DRAM_PHYS_DEFAULT_SIZE;
5132                 }
5133
5134                 prop->dram_size = dram_size;
5135                 prop->dram_end_address = prop->dram_base_address + dram_size;
5136         }
5137
5138         rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
5139         if (rc) {
5140                 dev_err(hdev->dev,
5141                         "Failed to build hwmon channel info, error %d\n", rc);
5142                 rc = -EFAULT;
5143                 goto out;
5144         }
5145
5146 out:
5147         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
5148                         sizeof(struct armcp_info), armcp_info_cpu_addr);
5149
5150         return rc;
5151 }
5152
5153 static void goya_init_clock_gating(struct hl_device *hdev)
5154 {
5155
5156 }
5157
5158 static void goya_disable_clock_gating(struct hl_device *hdev)
5159 {
5160
5161 }
5162
5163 static bool goya_is_device_idle(struct hl_device *hdev)
5164 {
5165         u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
5166         int i;
5167
5168         offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5169
5170         for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5171                 dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
5172
5173                 if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
5174                                 DMA_QM_IDLE_MASK)
5175                         return false;
5176         }
5177
5178         offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5179
5180         for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5181                 tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
5182                 tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
5183                 tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
5184
5185                 if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
5186                                 TPC_QM_IDLE_MASK)
5187                         return false;
5188
5189                 if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
5190                                 TPC_CMDQ_IDLE_MASK)
5191                         return false;
5192
5193                 if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
5194                                 TPC_CFG_IDLE_MASK)
5195                         return false;
5196         }
5197
5198         if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
5199                         MME_QM_IDLE_MASK)
5200                 return false;
5201
5202         if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
5203                         MME_CMDQ_IDLE_MASK)
5204                 return false;
5205
5206         if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
5207                         MME_ARCH_IDLE_MASK)
5208                 return false;
5209
5210         if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
5211                 return false;
5212
5213         return true;
5214 }
5215
5216 static void goya_hw_queues_lock(struct hl_device *hdev)
5217 {
5218         struct goya_device *goya = hdev->asic_specific;
5219
5220         spin_lock(&goya->hw_queues_lock);
5221 }
5222
5223 static void goya_hw_queues_unlock(struct hl_device *hdev)
5224 {
5225         struct goya_device *goya = hdev->asic_specific;
5226
5227         spin_unlock(&goya->hw_queues_lock);
5228 }
5229
5230 static u32 goya_get_pci_id(struct hl_device *hdev)
5231 {
5232         return hdev->pdev->device;
5233 }
5234
5235 static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
5236                                 size_t max_size)
5237 {
5238         struct goya_device *goya = hdev->asic_specific;
5239         struct asic_fixed_properties *prop = &hdev->asic_prop;
5240         struct armcp_packet pkt;
5241         void *eeprom_info_cpu_addr;
5242         dma_addr_t eeprom_info_dma_addr;
5243         long result;
5244         int rc;
5245
5246         if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5247                 return 0;
5248
5249         eeprom_info_cpu_addr =
5250                         hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
5251                                         max_size, &eeprom_info_dma_addr);
5252         if (!eeprom_info_cpu_addr) {
5253                 dev_err(hdev->dev,
5254                         "Failed to allocate DMA memory for EEPROM info packet\n");
5255                 return -ENOMEM;
5256         }
5257
5258         memset(eeprom_info_cpu_addr, 0, max_size);
5259
5260         memset(&pkt, 0, sizeof(pkt));
5261
5262         pkt.ctl = ARMCP_PACKET_EEPROM_DATA_GET << ARMCP_PKT_CTL_OPCODE_SHIFT;
5263         pkt.addr = eeprom_info_dma_addr + prop->host_phys_base_address;
5264         pkt.data_max_size = max_size;
5265
5266         rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
5267                         GOYA_ARMCP_EEPROM_TIMEOUT, &result);
5268
5269         if (rc) {
5270                 dev_err(hdev->dev,
5271                         "Failed to send armcp EEPROM pkt, error %d\n", rc);
5272                 goto out;
5273         }
5274
5275         /* result contains the actual size */
5276         memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
5277
5278 out:
5279         hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
5280                         eeprom_info_cpu_addr);
5281
5282         return rc;
5283 }
5284
5285 static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5286 {
5287         return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS);
5288 }
5289
5290 static const struct hl_asic_funcs goya_funcs = {
5291         .early_init = goya_early_init,
5292         .early_fini = goya_early_fini,
5293         .late_init = goya_late_init,
5294         .late_fini = goya_late_fini,
5295         .sw_init = goya_sw_init,
5296         .sw_fini = goya_sw_fini,
5297         .hw_init = goya_hw_init,
5298         .hw_fini = goya_hw_fini,
5299         .halt_engines = goya_halt_engines,
5300         .suspend = goya_suspend,
5301         .resume = goya_resume,
5302         .cb_mmap = goya_cb_mmap,
5303         .ring_doorbell = goya_ring_doorbell,
5304         .flush_pq_write = goya_flush_pq_write,
5305         .dma_alloc_coherent = goya_dma_alloc_coherent,
5306         .dma_free_coherent = goya_dma_free_coherent,
5307         .get_int_queue_base = goya_get_int_queue_base,
5308         .test_queues = goya_test_queues,
5309         .dma_pool_zalloc = goya_dma_pool_zalloc,
5310         .dma_pool_free = goya_dma_pool_free,
5311         .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5312         .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
5313         .hl_dma_unmap_sg = goya_dma_unmap_sg,
5314         .cs_parser = goya_cs_parser,
5315         .asic_dma_map_sg = goya_dma_map_sg,
5316         .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5317         .add_end_of_cb_packets = goya_add_end_of_cb_packets,
5318         .update_eq_ci = goya_update_eq_ci,
5319         .context_switch = goya_context_switch,
5320         .restore_phase_topology = goya_restore_phase_topology,
5321         .debugfs_read32 = goya_debugfs_read32,
5322         .debugfs_write32 = goya_debugfs_write32,
5323         .add_device_attr = goya_add_device_attr,
5324         .handle_eqe = goya_handle_eqe,
5325         .set_pll_profile = goya_set_pll_profile,
5326         .get_events_stat = goya_get_events_stat,
5327         .read_pte = goya_read_pte,
5328         .write_pte = goya_write_pte,
5329         .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5330         .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
5331         .send_heartbeat = goya_send_heartbeat,
5332         .enable_clock_gating = goya_init_clock_gating,
5333         .disable_clock_gating = goya_disable_clock_gating,
5334         .is_device_idle = goya_is_device_idle,
5335         .soft_reset_late_init = goya_soft_reset_late_init,
5336         .hw_queues_lock = goya_hw_queues_lock,
5337         .hw_queues_unlock = goya_hw_queues_unlock,
5338         .get_pci_id = goya_get_pci_id,
5339         .get_eeprom_data = goya_get_eeprom_data,
5340         .send_cpu_message = goya_send_cpu_message,
5341         .get_hw_state = goya_get_hw_state
5342 };
5343
5344 /*
5345  * goya_set_asic_funcs - set Goya function pointers
5346  *
5347  * @*hdev: pointer to hl_device structure
5348  *
5349  */
5350 void goya_set_asic_funcs(struct hl_device *hdev)
5351 {
5352         hdev->asic_funcs = &goya_funcs;
5353 }