drm/amdgpu: remove redundant logic related HDP
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gmc_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/firmware.h>
25 #include <linux/pci.h>
26
27 #include <drm/drm_cache.h>
28
29 #include "amdgpu.h"
30 #include "gmc_v9_0.h"
31 #include "amdgpu_atomfirmware.h"
32 #include "amdgpu_gem.h"
33
34 #include "gc/gc_9_0_sh_mask.h"
35 #include "dce/dce_12_0_offset.h"
36 #include "dce/dce_12_0_sh_mask.h"
37 #include "vega10_enum.h"
38 #include "mmhub/mmhub_1_0_offset.h"
39 #include "athub/athub_1_0_sh_mask.h"
40 #include "athub/athub_1_0_offset.h"
41 #include "oss/osssys_4_0_offset.h"
42
43 #include "soc15.h"
44 #include "soc15d.h"
45 #include "soc15_common.h"
46 #include "umc/umc_6_0_sh_mask.h"
47
48 #include "gfxhub_v1_0.h"
49 #include "mmhub_v1_0.h"
50 #include "athub_v1_0.h"
51 #include "gfxhub_v1_1.h"
52 #include "mmhub_v9_4.h"
53 #include "umc_v6_1.h"
54 #include "umc_v6_0.h"
55
56 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
57
58 #include "amdgpu_ras.h"
59 #include "amdgpu_xgmi.h"
60
61 /* add these here since we already include dce12 headers and these are for DCN */
62 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION                                                          0x055d
63 #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX                                                 2
64 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH__SHIFT                                        0x0
65 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT__SHIFT                                       0x10
66 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_WIDTH_MASK                                          0x00003FFFL
67 #define HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION__PRI_VIEWPORT_HEIGHT_MASK                                         0x3FFF0000L
68 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0                                                                  0x049d
69 #define mmDCHUBBUB_SDPIF_MMIO_CNTRL_0_BASE_IDX                                                         2
70
71
72 static const char *gfxhub_client_ids[] = {
73         "CB",
74         "DB",
75         "IA",
76         "WD",
77         "CPF",
78         "CPC",
79         "CPG",
80         "RLC",
81         "TCP",
82         "SQC (inst)",
83         "SQC (data)",
84         "SQG",
85         "PA",
86 };
87
88 static const char *mmhub_client_ids_raven[][2] = {
89         [0][0] = "MP1",
90         [1][0] = "MP0",
91         [2][0] = "VCN",
92         [3][0] = "VCNU",
93         [4][0] = "HDP",
94         [5][0] = "DCE",
95         [13][0] = "UTCL2",
96         [19][0] = "TLS",
97         [26][0] = "OSS",
98         [27][0] = "SDMA0",
99         [0][1] = "MP1",
100         [1][1] = "MP0",
101         [2][1] = "VCN",
102         [3][1] = "VCNU",
103         [4][1] = "HDP",
104         [5][1] = "XDP",
105         [6][1] = "DBGU0",
106         [7][1] = "DCE",
107         [8][1] = "DCEDWB0",
108         [9][1] = "DCEDWB1",
109         [26][1] = "OSS",
110         [27][1] = "SDMA0",
111 };
112
113 static const char *mmhub_client_ids_renoir[][2] = {
114         [0][0] = "MP1",
115         [1][0] = "MP0",
116         [2][0] = "HDP",
117         [4][0] = "DCEDMC",
118         [5][0] = "DCEVGA",
119         [13][0] = "UTCL2",
120         [19][0] = "TLS",
121         [26][0] = "OSS",
122         [27][0] = "SDMA0",
123         [28][0] = "VCN",
124         [29][0] = "VCNU",
125         [30][0] = "JPEG",
126         [0][1] = "MP1",
127         [1][1] = "MP0",
128         [2][1] = "HDP",
129         [3][1] = "XDP",
130         [6][1] = "DBGU0",
131         [7][1] = "DCEDMC",
132         [8][1] = "DCEVGA",
133         [9][1] = "DCEDWB",
134         [26][1] = "OSS",
135         [27][1] = "SDMA0",
136         [28][1] = "VCN",
137         [29][1] = "VCNU",
138         [30][1] = "JPEG",
139 };
140
141 static const char *mmhub_client_ids_vega10[][2] = {
142         [0][0] = "MP0",
143         [1][0] = "UVD",
144         [2][0] = "UVDU",
145         [3][0] = "HDP",
146         [13][0] = "UTCL2",
147         [14][0] = "OSS",
148         [15][0] = "SDMA1",
149         [32+0][0] = "VCE0",
150         [32+1][0] = "VCE0U",
151         [32+2][0] = "XDMA",
152         [32+3][0] = "DCE",
153         [32+4][0] = "MP1",
154         [32+14][0] = "SDMA0",
155         [0][1] = "MP0",
156         [1][1] = "UVD",
157         [2][1] = "UVDU",
158         [3][1] = "DBGU0",
159         [4][1] = "HDP",
160         [5][1] = "XDP",
161         [14][1] = "OSS",
162         [15][1] = "SDMA0",
163         [32+0][1] = "VCE0",
164         [32+1][1] = "VCE0U",
165         [32+2][1] = "XDMA",
166         [32+3][1] = "DCE",
167         [32+4][1] = "DCEDWB",
168         [32+5][1] = "MP1",
169         [32+6][1] = "DBGU1",
170         [32+14][1] = "SDMA1",
171 };
172
173 static const char *mmhub_client_ids_vega12[][2] = {
174         [0][0] = "MP0",
175         [1][0] = "VCE0",
176         [2][0] = "VCE0U",
177         [3][0] = "HDP",
178         [13][0] = "UTCL2",
179         [14][0] = "OSS",
180         [15][0] = "SDMA1",
181         [32+0][0] = "DCE",
182         [32+1][0] = "XDMA",
183         [32+2][0] = "UVD",
184         [32+3][0] = "UVDU",
185         [32+4][0] = "MP1",
186         [32+15][0] = "SDMA0",
187         [0][1] = "MP0",
188         [1][1] = "VCE0",
189         [2][1] = "VCE0U",
190         [3][1] = "DBGU0",
191         [4][1] = "HDP",
192         [5][1] = "XDP",
193         [14][1] = "OSS",
194         [15][1] = "SDMA0",
195         [32+0][1] = "DCE",
196         [32+1][1] = "DCEDWB",
197         [32+2][1] = "XDMA",
198         [32+3][1] = "UVD",
199         [32+4][1] = "UVDU",
200         [32+5][1] = "MP1",
201         [32+6][1] = "DBGU1",
202         [32+15][1] = "SDMA1",
203 };
204
205 static const char *mmhub_client_ids_vega20[][2] = {
206         [0][0] = "XDMA",
207         [1][0] = "DCE",
208         [2][0] = "VCE0",
209         [3][0] = "VCE0U",
210         [4][0] = "UVD",
211         [5][0] = "UVD1U",
212         [13][0] = "OSS",
213         [14][0] = "HDP",
214         [15][0] = "SDMA0",
215         [32+0][0] = "UVD",
216         [32+1][0] = "UVDU",
217         [32+2][0] = "MP1",
218         [32+3][0] = "MP0",
219         [32+12][0] = "UTCL2",
220         [32+14][0] = "SDMA1",
221         [0][1] = "XDMA",
222         [1][1] = "DCE",
223         [2][1] = "DCEDWB",
224         [3][1] = "VCE0",
225         [4][1] = "VCE0U",
226         [5][1] = "UVD1",
227         [6][1] = "UVD1U",
228         [7][1] = "DBGU0",
229         [8][1] = "XDP",
230         [13][1] = "OSS",
231         [14][1] = "HDP",
232         [15][1] = "SDMA0",
233         [32+0][1] = "UVD",
234         [32+1][1] = "UVDU",
235         [32+2][1] = "DBGU1",
236         [32+3][1] = "MP1",
237         [32+4][1] = "MP0",
238         [32+14][1] = "SDMA1",
239 };
240
241 static const char *mmhub_client_ids_arcturus[][2] = {
242         [2][0] = "MP1",
243         [3][0] = "MP0",
244         [10][0] = "UTCL2",
245         [13][0] = "OSS",
246         [14][0] = "HDP",
247         [15][0] = "SDMA0",
248         [32+15][0] = "SDMA1",
249         [64+15][0] = "SDMA2",
250         [96+15][0] = "SDMA3",
251         [128+15][0] = "SDMA4",
252         [160+11][0] = "JPEG",
253         [160+12][0] = "VCN",
254         [160+13][0] = "VCNU",
255         [160+15][0] = "SDMA5",
256         [192+10][0] = "UTCL2",
257         [192+11][0] = "JPEG1",
258         [192+12][0] = "VCN1",
259         [192+13][0] = "VCN1U",
260         [192+15][0] = "SDMA6",
261         [224+15][0] = "SDMA7",
262         [0][1] = "DBGU1",
263         [1][1] = "XDP",
264         [2][1] = "MP1",
265         [3][1] = "MP0",
266         [13][1] = "OSS",
267         [14][1] = "HDP",
268         [15][1] = "SDMA0",
269         [32+15][1] = "SDMA1",
270         [64+15][1] = "SDMA2",
271         [96+15][1] = "SDMA3",
272         [128+15][1] = "SDMA4",
273         [160+11][1] = "JPEG",
274         [160+12][1] = "VCN",
275         [160+13][1] = "VCNU",
276         [160+15][1] = "SDMA5",
277         [192+11][1] = "JPEG1",
278         [192+12][1] = "VCN1",
279         [192+13][1] = "VCN1U",
280         [192+15][1] = "SDMA6",
281         [224+15][1] = "SDMA7",
282 };
283
284 static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] =
285 {
286         SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa),
287         SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565)
288 };
289
290 static const struct soc15_reg_golden golden_settings_athub_1_0_0[] =
291 {
292         SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800),
293         SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008)
294 };
295
296 static const uint32_t ecc_umc_mcumc_ctrl_addrs[] = {
297         (0x000143c0 + 0x00000000),
298         (0x000143c0 + 0x00000800),
299         (0x000143c0 + 0x00001000),
300         (0x000143c0 + 0x00001800),
301         (0x000543c0 + 0x00000000),
302         (0x000543c0 + 0x00000800),
303         (0x000543c0 + 0x00001000),
304         (0x000543c0 + 0x00001800),
305         (0x000943c0 + 0x00000000),
306         (0x000943c0 + 0x00000800),
307         (0x000943c0 + 0x00001000),
308         (0x000943c0 + 0x00001800),
309         (0x000d43c0 + 0x00000000),
310         (0x000d43c0 + 0x00000800),
311         (0x000d43c0 + 0x00001000),
312         (0x000d43c0 + 0x00001800),
313         (0x001143c0 + 0x00000000),
314         (0x001143c0 + 0x00000800),
315         (0x001143c0 + 0x00001000),
316         (0x001143c0 + 0x00001800),
317         (0x001543c0 + 0x00000000),
318         (0x001543c0 + 0x00000800),
319         (0x001543c0 + 0x00001000),
320         (0x001543c0 + 0x00001800),
321         (0x001943c0 + 0x00000000),
322         (0x001943c0 + 0x00000800),
323         (0x001943c0 + 0x00001000),
324         (0x001943c0 + 0x00001800),
325         (0x001d43c0 + 0x00000000),
326         (0x001d43c0 + 0x00000800),
327         (0x001d43c0 + 0x00001000),
328         (0x001d43c0 + 0x00001800),
329 };
330
331 static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = {
332         (0x000143e0 + 0x00000000),
333         (0x000143e0 + 0x00000800),
334         (0x000143e0 + 0x00001000),
335         (0x000143e0 + 0x00001800),
336         (0x000543e0 + 0x00000000),
337         (0x000543e0 + 0x00000800),
338         (0x000543e0 + 0x00001000),
339         (0x000543e0 + 0x00001800),
340         (0x000943e0 + 0x00000000),
341         (0x000943e0 + 0x00000800),
342         (0x000943e0 + 0x00001000),
343         (0x000943e0 + 0x00001800),
344         (0x000d43e0 + 0x00000000),
345         (0x000d43e0 + 0x00000800),
346         (0x000d43e0 + 0x00001000),
347         (0x000d43e0 + 0x00001800),
348         (0x001143e0 + 0x00000000),
349         (0x001143e0 + 0x00000800),
350         (0x001143e0 + 0x00001000),
351         (0x001143e0 + 0x00001800),
352         (0x001543e0 + 0x00000000),
353         (0x001543e0 + 0x00000800),
354         (0x001543e0 + 0x00001000),
355         (0x001543e0 + 0x00001800),
356         (0x001943e0 + 0x00000000),
357         (0x001943e0 + 0x00000800),
358         (0x001943e0 + 0x00001000),
359         (0x001943e0 + 0x00001800),
360         (0x001d43e0 + 0x00000000),
361         (0x001d43e0 + 0x00000800),
362         (0x001d43e0 + 0x00001000),
363         (0x001d43e0 + 0x00001800),
364 };
365
366 static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
367                 struct amdgpu_irq_src *src,
368                 unsigned type,
369                 enum amdgpu_interrupt_state state)
370 {
371         u32 bits, i, tmp, reg;
372
373         /* Devices newer then VEGA10/12 shall have these programming
374              sequences performed by PSP BL */
375         if (adev->asic_type >= CHIP_VEGA20)
376                 return 0;
377
378         bits = 0x7f;
379
380         switch (state) {
381         case AMDGPU_IRQ_STATE_DISABLE:
382                 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
383                         reg = ecc_umc_mcumc_ctrl_addrs[i];
384                         tmp = RREG32(reg);
385                         tmp &= ~bits;
386                         WREG32(reg, tmp);
387                 }
388                 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
389                         reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
390                         tmp = RREG32(reg);
391                         tmp &= ~bits;
392                         WREG32(reg, tmp);
393                 }
394                 break;
395         case AMDGPU_IRQ_STATE_ENABLE:
396                 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_addrs); i++) {
397                         reg = ecc_umc_mcumc_ctrl_addrs[i];
398                         tmp = RREG32(reg);
399                         tmp |= bits;
400                         WREG32(reg, tmp);
401                 }
402                 for (i = 0; i < ARRAY_SIZE(ecc_umc_mcumc_ctrl_mask_addrs); i++) {
403                         reg = ecc_umc_mcumc_ctrl_mask_addrs[i];
404                         tmp = RREG32(reg);
405                         tmp |= bits;
406                         WREG32(reg, tmp);
407                 }
408                 break;
409         default:
410                 break;
411         }
412
413         return 0;
414 }
415
416 static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
417                                         struct amdgpu_irq_src *src,
418                                         unsigned type,
419                                         enum amdgpu_interrupt_state state)
420 {
421         struct amdgpu_vmhub *hub;
422         u32 tmp, reg, bits, i, j;
423
424         bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
425                 VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
426                 VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
427                 VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
428                 VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
429                 VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
430                 VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
431
432         switch (state) {
433         case AMDGPU_IRQ_STATE_DISABLE:
434                 for (j = 0; j < adev->num_vmhubs; j++) {
435                         hub = &adev->vmhub[j];
436                         for (i = 0; i < 16; i++) {
437                                 reg = hub->vm_context0_cntl + i;
438                                 tmp = RREG32(reg);
439                                 tmp &= ~bits;
440                                 WREG32(reg, tmp);
441                         }
442                 }
443                 break;
444         case AMDGPU_IRQ_STATE_ENABLE:
445                 for (j = 0; j < adev->num_vmhubs; j++) {
446                         hub = &adev->vmhub[j];
447                         for (i = 0; i < 16; i++) {
448                                 reg = hub->vm_context0_cntl + i;
449                                 tmp = RREG32(reg);
450                                 tmp |= bits;
451                                 WREG32(reg, tmp);
452                         }
453                 }
454                 break;
455         default:
456                 break;
457         }
458
459         return 0;
460 }
461
462 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
463                                       struct amdgpu_irq_src *source,
464                                       struct amdgpu_iv_entry *entry)
465 {
466         bool retry_fault = !!(entry->src_data[1] & 0x80);
467         uint32_t status = 0, cid = 0, rw = 0;
468         struct amdgpu_task_info task_info;
469         struct amdgpu_vmhub *hub;
470         const char *mmhub_cid;
471         const char *hub_name;
472         u64 addr;
473
474         addr = (u64)entry->src_data[0] << 12;
475         addr |= ((u64)entry->src_data[1] & 0xf) << 44;
476
477         if (retry_fault) {
478                 /* Returning 1 here also prevents sending the IV to the KFD */
479
480                 /* Process it onyl if it's the first fault for this address */
481                 if (entry->ih != &adev->irq.ih_soft &&
482                     amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
483                                              entry->timestamp))
484                         return 1;
485
486                 /* Delegate it to a different ring if the hardware hasn't
487                  * already done it.
488                  */
489                 if (in_interrupt()) {
490                         amdgpu_irq_delegate(adev, entry, 8);
491                         return 1;
492                 }
493
494                 /* Try to handle the recoverable page faults by filling page
495                  * tables
496                  */
497                 if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
498                         return 1;
499         }
500
501         if (!printk_ratelimit())
502                 return 0;
503
504         if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
505                 hub_name = "mmhub0";
506                 hub = &adev->vmhub[AMDGPU_MMHUB_0];
507         } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
508                 hub_name = "mmhub1";
509                 hub = &adev->vmhub[AMDGPU_MMHUB_1];
510         } else {
511                 hub_name = "gfxhub0";
512                 hub = &adev->vmhub[AMDGPU_GFXHUB_0];
513         }
514
515         memset(&task_info, 0, sizeof(struct amdgpu_task_info));
516         amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
517
518         dev_err(adev->dev,
519                 "[%s] %s page fault (src_id:%u ring:%u vmid:%u "
520                 "pasid:%u, for process %s pid %d thread %s pid %d)\n",
521                 hub_name, retry_fault ? "retry" : "no-retry",
522                 entry->src_id, entry->ring_id, entry->vmid,
523                 entry->pasid, task_info.process_name, task_info.tgid,
524                 task_info.task_name, task_info.pid);
525         dev_err(adev->dev, "  in page starting at address 0x%012llx from client %d\n",
526                 addr, entry->client_id);
527
528         if (amdgpu_sriov_vf(adev))
529                 return 0;
530
531         /*
532          * Issue a dummy read to wait for the status register to
533          * be updated to avoid reading an incorrect value due to
534          * the new fast GRBM interface.
535          */
536         if (entry->vmid_src == AMDGPU_GFXHUB_0)
537                 RREG32(hub->vm_l2_pro_fault_status);
538
539         status = RREG32(hub->vm_l2_pro_fault_status);
540         cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
541         rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
542         WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
543
544
545         dev_err(adev->dev,
546                 "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
547                 status);
548         if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) {
549                 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
550                         cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" :
551                         gfxhub_client_ids[cid],
552                         cid);
553         } else {
554                 switch (adev->asic_type) {
555                 case CHIP_VEGA10:
556                         mmhub_cid = mmhub_client_ids_vega10[cid][rw];
557                         break;
558                 case CHIP_VEGA12:
559                         mmhub_cid = mmhub_client_ids_vega12[cid][rw];
560                         break;
561                 case CHIP_VEGA20:
562                         mmhub_cid = mmhub_client_ids_vega20[cid][rw];
563                         break;
564                 case CHIP_ARCTURUS:
565                         mmhub_cid = mmhub_client_ids_arcturus[cid][rw];
566                         break;
567                 case CHIP_RAVEN:
568                         mmhub_cid = mmhub_client_ids_raven[cid][rw];
569                         break;
570                 case CHIP_RENOIR:
571                         mmhub_cid = mmhub_client_ids_renoir[cid][rw];
572                         break;
573                 default:
574                         mmhub_cid = NULL;
575                         break;
576                 }
577                 dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
578                         mmhub_cid ? mmhub_cid : "unknown", cid);
579         }
580         dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
581                 REG_GET_FIELD(status,
582                 VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
583         dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
584                 REG_GET_FIELD(status,
585                 VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
586         dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
587                 REG_GET_FIELD(status,
588                 VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
589         dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
590                 REG_GET_FIELD(status,
591                 VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
592         dev_err(adev->dev, "\t RW: 0x%x\n", rw);
593         return 0;
594 }
595
596 static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
597         .set = gmc_v9_0_vm_fault_interrupt_state,
598         .process = gmc_v9_0_process_interrupt,
599 };
600
601
602 static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
603         .set = gmc_v9_0_ecc_interrupt_state,
604         .process = amdgpu_umc_process_ecc_irq,
605 };
606
607 static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
608 {
609         adev->gmc.vm_fault.num_types = 1;
610         adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
611
612         if (!amdgpu_sriov_vf(adev)) {
613                 adev->gmc.ecc_irq.num_types = 1;
614                 adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs;
615         }
616 }
617
618 static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
619                                         uint32_t flush_type)
620 {
621         u32 req = 0;
622
623         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
624                             PER_VMID_INVALIDATE_REQ, 1 << vmid);
625         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
626         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
627         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
628         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
629         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
630         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
631         req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
632                             CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
633
634         return req;
635 }
636
637 /**
638  * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
639  *
640  * @adev: amdgpu_device pointer
641  * @vmhub: vmhub type
642  *
643  */
644 static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
645                                        uint32_t vmhub)
646 {
647         return ((vmhub == AMDGPU_MMHUB_0 ||
648                  vmhub == AMDGPU_MMHUB_1) &&
649                 (!amdgpu_sriov_vf(adev)) &&
650                 (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) &&
651                    (adev->apu_flags & AMD_APU_IS_PICASSO))));
652 }
653
654 static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
655                                         uint8_t vmid, uint16_t *p_pasid)
656 {
657         uint32_t value;
658
659         value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
660                      + vmid);
661         *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
662
663         return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
664 }
665
666 /*
667  * GART
668  * VMID 0 is the physical GPU addresses as used by the kernel.
669  * VMIDs 1-15 are used for userspace clients and are handled
670  * by the amdgpu vm/hsa code.
671  */
672
673 /**
674  * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type
675  *
676  * @adev: amdgpu_device pointer
677  * @vmid: vm instance to flush
678  * @vmhub: which hub to flush
679  * @flush_type: the flush type
680  *
681  * Flush the TLB for the requested page table using certain type.
682  */
683 static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
684                                         uint32_t vmhub, uint32_t flush_type)
685 {
686         bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
687         const unsigned eng = 17;
688         u32 j, inv_req, inv_req2, tmp;
689         struct amdgpu_vmhub *hub;
690
691         BUG_ON(vmhub >= adev->num_vmhubs);
692
693         hub = &adev->vmhub[vmhub];
694         if (adev->gmc.xgmi.num_physical_nodes &&
695             adev->asic_type == CHIP_VEGA20) {
696                 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
697                  * heavy-weight TLB flush (type 2), which flushes
698                  * both. Due to a race condition with concurrent
699                  * memory accesses using the same TLB cache line, we
700                  * still need a second TLB flush after this.
701                  */
702                 inv_req = gmc_v9_0_get_invalidate_req(vmid, 2);
703                 inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type);
704         } else {
705                 inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
706                 inv_req2 = 0;
707         }
708
709         /* This is necessary for a HW workaround under SRIOV as well
710          * as GFXOFF under bare metal
711          */
712         if (adev->gfx.kiq.ring.sched.ready &&
713             (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
714             down_read_trylock(&adev->reset_sem)) {
715                 uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
716                 uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
717
718                 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
719                                                    1 << vmid);
720                 up_read(&adev->reset_sem);
721                 return;
722         }
723
724         spin_lock(&adev->gmc.invalidate_lock);
725
726         /*
727          * It may lose gpuvm invalidate acknowldege state across power-gating
728          * off cycle, add semaphore acquire before invalidation and semaphore
729          * release after invalidation to avoid entering power gated state
730          * to WA the Issue
731          */
732
733         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
734         if (use_semaphore) {
735                 for (j = 0; j < adev->usec_timeout; j++) {
736                         /* a read return value of 1 means semaphore acuqire */
737                         tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
738                                             hub->eng_distance * eng);
739                         if (tmp & 0x1)
740                                 break;
741                         udelay(1);
742                 }
743
744                 if (j >= adev->usec_timeout)
745                         DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
746         }
747
748         do {
749                 WREG32_NO_KIQ(hub->vm_inv_eng0_req +
750                               hub->eng_distance * eng, inv_req);
751
752                 /*
753                  * Issue a dummy read to wait for the ACK register to
754                  * be cleared to avoid a false ACK due to the new fast
755                  * GRBM interface.
756                  */
757                 if (vmhub == AMDGPU_GFXHUB_0)
758                         RREG32_NO_KIQ(hub->vm_inv_eng0_req +
759                                       hub->eng_distance * eng);
760
761                 for (j = 0; j < adev->usec_timeout; j++) {
762                         tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
763                                             hub->eng_distance * eng);
764                         if (tmp & (1 << vmid))
765                                 break;
766                         udelay(1);
767                 }
768
769                 inv_req = inv_req2;
770                 inv_req2 = 0;
771         } while (inv_req);
772
773         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
774         if (use_semaphore)
775                 /*
776                  * add semaphore release after invalidation,
777                  * write with 0 means semaphore release
778                  */
779                 WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
780                               hub->eng_distance * eng, 0);
781
782         spin_unlock(&adev->gmc.invalidate_lock);
783
784         if (j < adev->usec_timeout)
785                 return;
786
787         DRM_ERROR("Timeout waiting for VM flush ACK!\n");
788 }
789
790 /**
791  * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
792  *
793  * @adev: amdgpu_device pointer
794  * @pasid: pasid to be flush
795  * @flush_type: the flush type
796  * @all_hub: flush all hubs
797  *
798  * Flush the TLB for the requested pasid.
799  */
800 static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
801                                         uint16_t pasid, uint32_t flush_type,
802                                         bool all_hub)
803 {
804         int vmid, i;
805         signed long r;
806         uint32_t seq;
807         uint16_t queried_pasid;
808         bool ret;
809         struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
810         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
811
812         if (amdgpu_in_reset(adev))
813                 return -EIO;
814
815         if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) {
816                 /* Vega20+XGMI caches PTEs in TC and TLB. Add a
817                  * heavy-weight TLB flush (type 2), which flushes
818                  * both. Due to a race condition with concurrent
819                  * memory accesses using the same TLB cache line, we
820                  * still need a second TLB flush after this.
821                  */
822                 bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes &&
823                                        adev->asic_type == CHIP_VEGA20);
824                 /* 2 dwords flush + 8 dwords fence */
825                 unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8;
826
827                 if (vega20_xgmi_wa)
828                         ndw += kiq->pmf->invalidate_tlbs_size;
829
830                 spin_lock(&adev->gfx.kiq.ring_lock);
831                 /* 2 dwords flush + 8 dwords fence */
832                 amdgpu_ring_alloc(ring, ndw);
833                 if (vega20_xgmi_wa)
834                         kiq->pmf->kiq_invalidate_tlbs(ring,
835                                                       pasid, 2, all_hub);
836                 kiq->pmf->kiq_invalidate_tlbs(ring,
837                                         pasid, flush_type, all_hub);
838                 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
839                 if (r) {
840                         amdgpu_ring_undo(ring);
841                         spin_unlock(&adev->gfx.kiq.ring_lock);
842                         up_read(&adev->reset_sem);
843                         return -ETIME;
844                 }
845
846                 amdgpu_ring_commit(ring);
847                 spin_unlock(&adev->gfx.kiq.ring_lock);
848                 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
849                 if (r < 1) {
850                         dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
851                         up_read(&adev->reset_sem);
852                         return -ETIME;
853                 }
854                 up_read(&adev->reset_sem);
855                 return 0;
856         }
857
858         for (vmid = 1; vmid < 16; vmid++) {
859
860                 ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
861                                 &queried_pasid);
862                 if (ret && queried_pasid == pasid) {
863                         if (all_hub) {
864                                 for (i = 0; i < adev->num_vmhubs; i++)
865                                         gmc_v9_0_flush_gpu_tlb(adev, vmid,
866                                                         i, flush_type);
867                         } else {
868                                 gmc_v9_0_flush_gpu_tlb(adev, vmid,
869                                                 AMDGPU_GFXHUB_0, flush_type);
870                         }
871                         break;
872                 }
873         }
874
875         return 0;
876
877 }
878
879 static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
880                                             unsigned vmid, uint64_t pd_addr)
881 {
882         bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
883         struct amdgpu_device *adev = ring->adev;
884         struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
885         uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
886         unsigned eng = ring->vm_inv_eng;
887
888         /*
889          * It may lose gpuvm invalidate acknowldege state across power-gating
890          * off cycle, add semaphore acquire before invalidation and semaphore
891          * release after invalidation to avoid entering power gated state
892          * to WA the Issue
893          */
894
895         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
896         if (use_semaphore)
897                 /* a read return value of 1 means semaphore acuqire */
898                 amdgpu_ring_emit_reg_wait(ring,
899                                           hub->vm_inv_eng0_sem +
900                                           hub->eng_distance * eng, 0x1, 0x1);
901
902         amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
903                               (hub->ctx_addr_distance * vmid),
904                               lower_32_bits(pd_addr));
905
906         amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
907                               (hub->ctx_addr_distance * vmid),
908                               upper_32_bits(pd_addr));
909
910         amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
911                                             hub->eng_distance * eng,
912                                             hub->vm_inv_eng0_ack +
913                                             hub->eng_distance * eng,
914                                             req, 1 << vmid);
915
916         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
917         if (use_semaphore)
918                 /*
919                  * add semaphore release after invalidation,
920                  * write with 0 means semaphore release
921                  */
922                 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
923                                       hub->eng_distance * eng, 0);
924
925         return pd_addr;
926 }
927
928 static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
929                                         unsigned pasid)
930 {
931         struct amdgpu_device *adev = ring->adev;
932         uint32_t reg;
933
934         /* Do nothing because there's no lut register for mmhub1. */
935         if (ring->funcs->vmhub == AMDGPU_MMHUB_1)
936                 return;
937
938         if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
939                 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
940         else
941                 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
942
943         amdgpu_ring_emit_wreg(ring, reg, pasid);
944 }
945
946 /*
947  * PTE format on VEGA 10:
948  * 63:59 reserved
949  * 58:57 mtype
950  * 56 F
951  * 55 L
952  * 54 P
953  * 53 SW
954  * 52 T
955  * 50:48 reserved
956  * 47:12 4k physical page base address
957  * 11:7 fragment
958  * 6 write
959  * 5 read
960  * 4 exe
961  * 3 Z
962  * 2 snooped
963  * 1 system
964  * 0 valid
965  *
966  * PDE format on VEGA 10:
967  * 63:59 block fragment size
968  * 58:55 reserved
969  * 54 P
970  * 53:48 reserved
971  * 47:6 physical base address of PD or PTE
972  * 5:3 reserved
973  * 2 C
974  * 1 system
975  * 0 valid
976  */
977
978 static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
979
980 {
981         switch (flags) {
982         case AMDGPU_VM_MTYPE_DEFAULT:
983                 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
984         case AMDGPU_VM_MTYPE_NC:
985                 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
986         case AMDGPU_VM_MTYPE_WC:
987                 return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
988         case AMDGPU_VM_MTYPE_RW:
989                 return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
990         case AMDGPU_VM_MTYPE_CC:
991                 return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
992         case AMDGPU_VM_MTYPE_UC:
993                 return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
994         default:
995                 return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
996         }
997 }
998
999 static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
1000                                 uint64_t *addr, uint64_t *flags)
1001 {
1002         if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
1003                 *addr = adev->vm_manager.vram_base_offset + *addr -
1004                         adev->gmc.vram_start;
1005         BUG_ON(*addr & 0xFFFF00000000003FULL);
1006
1007         if (!adev->gmc.translate_further)
1008                 return;
1009
1010         if (level == AMDGPU_VM_PDB1) {
1011                 /* Set the block fragment size */
1012                 if (!(*flags & AMDGPU_PDE_PTE))
1013                         *flags |= AMDGPU_PDE_BFS(0x9);
1014
1015         } else if (level == AMDGPU_VM_PDB0) {
1016                 if (*flags & AMDGPU_PDE_PTE)
1017                         *flags &= ~AMDGPU_PDE_PTE;
1018                 else
1019                         *flags |= AMDGPU_PTE_TF;
1020         }
1021 }
1022
1023 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
1024                                 struct amdgpu_bo_va_mapping *mapping,
1025                                 uint64_t *flags)
1026 {
1027         *flags &= ~AMDGPU_PTE_EXECUTABLE;
1028         *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
1029
1030         *flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
1031         *flags |= mapping->flags & AMDGPU_PTE_MTYPE_VG10_MASK;
1032
1033         if (mapping->flags & AMDGPU_PTE_PRT) {
1034                 *flags |= AMDGPU_PTE_PRT;
1035                 *flags &= ~AMDGPU_PTE_VALID;
1036         }
1037
1038         if (adev->asic_type == CHIP_ARCTURUS &&
1039             !(*flags & AMDGPU_PTE_SYSTEM) &&
1040             mapping->bo_va->is_xgmi)
1041                 *flags |= AMDGPU_PTE_SNOOPED;
1042 }
1043
1044 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
1045 {
1046         u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
1047         unsigned size;
1048
1049         if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
1050                 size = AMDGPU_VBIOS_VGA_ALLOCATION;
1051         } else {
1052                 u32 viewport;
1053
1054                 switch (adev->asic_type) {
1055                 case CHIP_RAVEN:
1056                 case CHIP_RENOIR:
1057                         viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
1058                         size = (REG_GET_FIELD(viewport,
1059                                               HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
1060                                 REG_GET_FIELD(viewport,
1061                                               HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_WIDTH) *
1062                                 4);
1063                         break;
1064                 case CHIP_VEGA10:
1065                 case CHIP_VEGA12:
1066                 case CHIP_VEGA20:
1067                 default:
1068                         viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE);
1069                         size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) *
1070                                 REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_WIDTH) *
1071                                 4);
1072                         break;
1073                 }
1074         }
1075
1076         return size;
1077 }
1078
1079 static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
1080         .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
1081         .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
1082         .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
1083         .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
1084         .map_mtype = gmc_v9_0_map_mtype,
1085         .get_vm_pde = gmc_v9_0_get_vm_pde,
1086         .get_vm_pte = gmc_v9_0_get_vm_pte,
1087         .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
1088 };
1089
1090 static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
1091 {
1092         adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
1093 }
1094
1095 static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
1096 {
1097         switch (adev->asic_type) {
1098         case CHIP_VEGA10:
1099                 adev->umc.funcs = &umc_v6_0_funcs;
1100                 break;
1101         case CHIP_VEGA20:
1102                 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1103                 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1104                 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1105                 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20;
1106                 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1107                 adev->umc.funcs = &umc_v6_1_funcs;
1108                 break;
1109         case CHIP_ARCTURUS:
1110                 adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM;
1111                 adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM;
1112                 adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM;
1113                 adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT;
1114                 adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0];
1115                 adev->umc.funcs = &umc_v6_1_funcs;
1116                 break;
1117         default:
1118                 break;
1119         }
1120 }
1121
1122 static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev)
1123 {
1124         switch (adev->asic_type) {
1125         case CHIP_ARCTURUS:
1126                 adev->mmhub.funcs = &mmhub_v9_4_funcs;
1127                 break;
1128         default:
1129                 adev->mmhub.funcs = &mmhub_v1_0_funcs;
1130                 break;
1131         }
1132 }
1133
1134 static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
1135 {
1136         adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
1137 }
1138
1139 static int gmc_v9_0_early_init(void *handle)
1140 {
1141         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1142
1143         gmc_v9_0_set_gmc_funcs(adev);
1144         gmc_v9_0_set_irq_funcs(adev);
1145         gmc_v9_0_set_umc_funcs(adev);
1146         gmc_v9_0_set_mmhub_funcs(adev);
1147         gmc_v9_0_set_gfxhub_funcs(adev);
1148
1149         adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
1150         adev->gmc.shared_aperture_end =
1151                 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
1152         adev->gmc.private_aperture_start = 0x1000000000000000ULL;
1153         adev->gmc.private_aperture_end =
1154                 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
1155
1156         return 0;
1157 }
1158
1159 static int gmc_v9_0_late_init(void *handle)
1160 {
1161         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1162         int r;
1163
1164         r = amdgpu_gmc_allocate_vm_inv_eng(adev);
1165         if (r)
1166                 return r;
1167
1168         /*
1169          * Workaround performance drop issue with VBIOS enables partial
1170          * writes, while disables HBM ECC for vega10.
1171          */
1172         if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
1173                 if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
1174                         if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
1175                                 adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
1176                 }
1177         }
1178
1179         if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
1180                 adev->mmhub.funcs->reset_ras_error_count(adev);
1181
1182         r = amdgpu_gmc_ras_late_init(adev);
1183         if (r)
1184                 return r;
1185
1186         return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
1187 }
1188
1189 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
1190                                         struct amdgpu_gmc *mc)
1191 {
1192         u64 base = 0;
1193
1194         if (!amdgpu_sriov_vf(adev))
1195                 base = adev->mmhub.funcs->get_fb_location(adev);
1196
1197         /* add the xgmi offset of the physical node */
1198         base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1199         amdgpu_gmc_vram_location(adev, mc, base);
1200         amdgpu_gmc_gart_location(adev, mc);
1201         amdgpu_gmc_agp_location(adev, mc);
1202         /* base offset of vram pages */
1203         adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1204
1205         /* XXX: add the xgmi offset of the physical node? */
1206         adev->vm_manager.vram_base_offset +=
1207                 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
1208 }
1209
1210 /**
1211  * gmc_v9_0_mc_init - initialize the memory controller driver params
1212  *
1213  * @adev: amdgpu_device pointer
1214  *
1215  * Look up the amount of vram, vram width, and decide how to place
1216  * vram and gart within the GPU's physical address space.
1217  * Returns 0 for success.
1218  */
1219 static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
1220 {
1221         int r;
1222
1223         /* size in MB on si */
1224         adev->gmc.mc_vram_size =
1225                 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
1226         adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
1227
1228         if (!(adev->flags & AMD_IS_APU)) {
1229                 r = amdgpu_device_resize_fb_bar(adev);
1230                 if (r)
1231                         return r;
1232         }
1233         adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
1234         adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
1235
1236 #ifdef CONFIG_X86_64
1237         if (adev->flags & AMD_IS_APU) {
1238                 adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
1239                 adev->gmc.aper_size = adev->gmc.real_vram_size;
1240         }
1241 #endif
1242         /* In case the PCI BAR is larger than the actual amount of vram */
1243         adev->gmc.visible_vram_size = adev->gmc.aper_size;
1244         if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
1245                 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
1246
1247         /* set the gart size */
1248         if (amdgpu_gart_size == -1) {
1249                 switch (adev->asic_type) {
1250                 case CHIP_VEGA10:  /* all engines support GPUVM */
1251                 case CHIP_VEGA12:  /* all engines support GPUVM */
1252                 case CHIP_VEGA20:
1253                 case CHIP_ARCTURUS:
1254                 default:
1255                         adev->gmc.gart_size = 512ULL << 20;
1256                         break;
1257                 case CHIP_RAVEN:   /* DCE SG support */
1258                 case CHIP_RENOIR:
1259                         adev->gmc.gart_size = 1024ULL << 20;
1260                         break;
1261                 }
1262         } else {
1263                 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
1264         }
1265
1266         gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
1267
1268         return 0;
1269 }
1270
1271 static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
1272 {
1273         int r;
1274
1275         if (adev->gart.bo) {
1276                 WARN(1, "VEGA10 PCIE GART already initialized\n");
1277                 return 0;
1278         }
1279         /* Initialize common gart structure */
1280         r = amdgpu_gart_init(adev);
1281         if (r)
1282                 return r;
1283         adev->gart.table_size = adev->gart.num_gpu_pages * 8;
1284         adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
1285                                  AMDGPU_PTE_EXECUTABLE;
1286         return amdgpu_gart_table_vram_alloc(adev);
1287 }
1288
1289 /**
1290  * gmc_v9_0_save_registers - saves regs
1291  *
1292  * @adev: amdgpu_device pointer
1293  *
1294  * This saves potential register values that should be
1295  * restored upon resume
1296  */
1297 static void gmc_v9_0_save_registers(struct amdgpu_device *adev)
1298 {
1299         if (adev->asic_type == CHIP_RAVEN)
1300                 adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0);
1301 }
1302
1303 static int gmc_v9_0_sw_init(void *handle)
1304 {
1305         int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
1306         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1307
1308         adev->gfxhub.funcs->init(adev);
1309
1310         adev->mmhub.funcs->init(adev);
1311
1312         spin_lock_init(&adev->gmc.invalidate_lock);
1313
1314         r = amdgpu_atomfirmware_get_vram_info(adev,
1315                 &vram_width, &vram_type, &vram_vendor);
1316         if (amdgpu_sriov_vf(adev))
1317                 /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN,
1318                  * and DF related registers is not readable, seems hardcord is the
1319                  * only way to set the correct vram_width
1320                  */
1321                 adev->gmc.vram_width = 2048;
1322         else if (amdgpu_emu_mode != 1)
1323                 adev->gmc.vram_width = vram_width;
1324
1325         if (!adev->gmc.vram_width) {
1326                 int chansize, numchan;
1327
1328                 /* hbm memory channel size */
1329                 if (adev->flags & AMD_IS_APU)
1330                         chansize = 64;
1331                 else
1332                         chansize = 128;
1333
1334                 numchan = adev->df.funcs->get_hbm_channel_number(adev);
1335                 adev->gmc.vram_width = numchan * chansize;
1336         }
1337
1338         adev->gmc.vram_type = vram_type;
1339         adev->gmc.vram_vendor = vram_vendor;
1340         switch (adev->asic_type) {
1341         case CHIP_RAVEN:
1342                 adev->num_vmhubs = 2;
1343
1344                 if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
1345                         amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1346                 } else {
1347                         /* vm_size is 128TB + 512GB for legacy 3-level page support */
1348                         amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
1349                         adev->gmc.translate_further =
1350                                 adev->vm_manager.num_level > 1;
1351                 }
1352                 break;
1353         case CHIP_VEGA10:
1354         case CHIP_VEGA12:
1355         case CHIP_VEGA20:
1356         case CHIP_RENOIR:
1357                 adev->num_vmhubs = 2;
1358
1359
1360                 /*
1361                  * To fulfill 4-level page support,
1362                  * vm size is 256TB (48bit), maximum size of Vega10,
1363                  * block size 512 (9bit)
1364                  */
1365                 /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */
1366                 if (amdgpu_sriov_vf(adev))
1367                         amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47);
1368                 else
1369                         amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1370                 break;
1371         case CHIP_ARCTURUS:
1372                 adev->num_vmhubs = 3;
1373
1374                 /* Keep the vm size same with Vega20 */
1375                 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
1376                 break;
1377         default:
1378                 break;
1379         }
1380
1381         /* This interrupt is VMC page fault.*/
1382         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
1383                                 &adev->gmc.vm_fault);
1384         if (r)
1385                 return r;
1386
1387         if (adev->asic_type == CHIP_ARCTURUS) {
1388                 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT,
1389                                         &adev->gmc.vm_fault);
1390                 if (r)
1391                         return r;
1392         }
1393
1394         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
1395                                 &adev->gmc.vm_fault);
1396
1397         if (r)
1398                 return r;
1399
1400         if (!amdgpu_sriov_vf(adev)) {
1401                 /* interrupt sent to DF. */
1402                 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
1403                                       &adev->gmc.ecc_irq);
1404                 if (r)
1405                         return r;
1406         }
1407
1408         /* Set the internal MC address mask
1409          * This is the max address of the GPU's
1410          * internal address space.
1411          */
1412         adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
1413
1414         r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
1415         if (r) {
1416                 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
1417                 return r;
1418         }
1419         adev->need_swiotlb = drm_need_swiotlb(44);
1420
1421         if (adev->gmc.xgmi.supported) {
1422                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
1423                 if (r)
1424                         return r;
1425         }
1426
1427         r = gmc_v9_0_mc_init(adev);
1428         if (r)
1429                 return r;
1430
1431         amdgpu_gmc_get_vbios_allocations(adev);
1432
1433         /* Memory manager */
1434         r = amdgpu_bo_init(adev);
1435         if (r)
1436                 return r;
1437
1438         r = gmc_v9_0_gart_init(adev);
1439         if (r)
1440                 return r;
1441
1442         /*
1443          * number of VMs
1444          * VMID 0 is reserved for System
1445          * amdgpu graphics/compute will use VMIDs 1..n-1
1446          * amdkfd will use VMIDs n..15
1447          *
1448          * The first KFD VMID is 8 for GPUs with graphics, 3 for
1449          * compute-only GPUs. On compute-only GPUs that leaves 2 VMIDs
1450          * for video processing.
1451          */
1452         adev->vm_manager.first_kfd_vmid =
1453                 adev->asic_type == CHIP_ARCTURUS ? 3 : 8;
1454
1455         amdgpu_vm_manager_init(adev);
1456
1457         gmc_v9_0_save_registers(adev);
1458
1459         return 0;
1460 }
1461
1462 static int gmc_v9_0_sw_fini(void *handle)
1463 {
1464         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1465
1466         amdgpu_gmc_ras_fini(adev);
1467         amdgpu_gem_force_release(adev);
1468         amdgpu_vm_manager_fini(adev);
1469         amdgpu_gart_table_vram_free(adev);
1470         amdgpu_bo_fini(adev);
1471         amdgpu_gart_fini(adev);
1472
1473         return 0;
1474 }
1475
1476 static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
1477 {
1478
1479         switch (adev->asic_type) {
1480         case CHIP_VEGA10:
1481                 if (amdgpu_sriov_vf(adev))
1482                         break;
1483                 fallthrough;
1484         case CHIP_VEGA20:
1485                 soc15_program_register_sequence(adev,
1486                                                 golden_settings_mmhub_1_0_0,
1487                                                 ARRAY_SIZE(golden_settings_mmhub_1_0_0));
1488                 soc15_program_register_sequence(adev,
1489                                                 golden_settings_athub_1_0_0,
1490                                                 ARRAY_SIZE(golden_settings_athub_1_0_0));
1491                 break;
1492         case CHIP_VEGA12:
1493                 break;
1494         case CHIP_RAVEN:
1495                 /* TODO for renoir */
1496                 soc15_program_register_sequence(adev,
1497                                                 golden_settings_athub_1_0_0,
1498                                                 ARRAY_SIZE(golden_settings_athub_1_0_0));
1499                 break;
1500         default:
1501                 break;
1502         }
1503 }
1504
1505 /**
1506  * gmc_v9_0_restore_registers - restores regs
1507  *
1508  * @adev: amdgpu_device pointer
1509  *
1510  * This restores register values, saved at suspend.
1511  */
1512 void gmc_v9_0_restore_registers(struct amdgpu_device *adev)
1513 {
1514         if (adev->asic_type == CHIP_RAVEN) {
1515                 WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register);
1516                 WARN_ON(adev->gmc.sdpif_register !=
1517                         RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0));
1518         }
1519 }
1520
1521 /**
1522  * gmc_v9_0_gart_enable - gart enable
1523  *
1524  * @adev: amdgpu_device pointer
1525  */
1526 static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
1527 {
1528         int r;
1529
1530         if (adev->gart.bo == NULL) {
1531                 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1532                 return -EINVAL;
1533         }
1534         r = amdgpu_gart_table_vram_pin(adev);
1535         if (r)
1536                 return r;
1537
1538         r = adev->gfxhub.funcs->gart_enable(adev);
1539         if (r)
1540                 return r;
1541
1542         r = adev->mmhub.funcs->gart_enable(adev);
1543         if (r)
1544                 return r;
1545
1546         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1547                  (unsigned)(adev->gmc.gart_size >> 20),
1548                  (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1549         adev->gart.ready = true;
1550         return 0;
1551 }
1552
1553 static int gmc_v9_0_hw_init(void *handle)
1554 {
1555         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1556         bool value;
1557         int r, i;
1558
1559         /* The sequence of these two function calls matters.*/
1560         gmc_v9_0_init_golden_registers(adev);
1561
1562         if (adev->mode_info.num_crtc) {
1563                 /* Lockout access through VGA aperture*/
1564                 WREG32_FIELD15(DCE, 0, VGA_HDP_CONTROL, VGA_MEMORY_DISABLE, 1);
1565                 /* disable VGA render */
1566                 WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0);
1567         }
1568
1569         if (adev->mmhub.funcs->update_power_gating)
1570                 adev->mmhub.funcs->update_power_gating(adev, true);
1571
1572         adev->hdp.funcs->init_registers(adev);
1573
1574         /* After HDP is initialized, flush HDP.*/
1575         adev->hdp.funcs->flush_hdp(adev, NULL);
1576
1577         if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
1578                 value = false;
1579         else
1580                 value = true;
1581
1582         if (!amdgpu_sriov_vf(adev)) {
1583                 adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1584                 adev->mmhub.funcs->set_fault_enable_default(adev, value);
1585         }
1586         for (i = 0; i < adev->num_vmhubs; ++i)
1587                 gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
1588
1589         if (adev->umc.funcs && adev->umc.funcs->init_registers)
1590                 adev->umc.funcs->init_registers(adev);
1591
1592         r = gmc_v9_0_gart_enable(adev);
1593
1594         return r;
1595 }
1596
1597 /**
1598  * gmc_v9_0_gart_disable - gart disable
1599  *
1600  * @adev: amdgpu_device pointer
1601  *
1602  * This disables all VM page table.
1603  */
1604 static void gmc_v9_0_gart_disable(struct amdgpu_device *adev)
1605 {
1606         adev->gfxhub.funcs->gart_disable(adev);
1607         adev->mmhub.funcs->gart_disable(adev);
1608         amdgpu_gart_table_vram_unpin(adev);
1609 }
1610
1611 static int gmc_v9_0_hw_fini(void *handle)
1612 {
1613         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1614
1615         if (amdgpu_sriov_vf(adev)) {
1616                 /* full access mode, so don't touch any GMC register */
1617                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1618                 return 0;
1619         }
1620
1621         amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1622         amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1623         gmc_v9_0_gart_disable(adev);
1624
1625         return 0;
1626 }
1627
1628 static int gmc_v9_0_suspend(void *handle)
1629 {
1630         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1631
1632         return gmc_v9_0_hw_fini(adev);
1633 }
1634
1635 static int gmc_v9_0_resume(void *handle)
1636 {
1637         int r;
1638         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1639
1640         r = gmc_v9_0_hw_init(adev);
1641         if (r)
1642                 return r;
1643
1644         amdgpu_vmid_reset_all(adev);
1645
1646         return 0;
1647 }
1648
1649 static bool gmc_v9_0_is_idle(void *handle)
1650 {
1651         /* MC is always ready in GMC v9.*/
1652         return true;
1653 }
1654
1655 static int gmc_v9_0_wait_for_idle(void *handle)
1656 {
1657         /* There is no need to wait for MC idle in GMC v9.*/
1658         return 0;
1659 }
1660
1661 static int gmc_v9_0_soft_reset(void *handle)
1662 {
1663         /* XXX for emulation.*/
1664         return 0;
1665 }
1666
1667 static int gmc_v9_0_set_clockgating_state(void *handle,
1668                                         enum amd_clockgating_state state)
1669 {
1670         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1671
1672         adev->mmhub.funcs->set_clockgating(adev, state);
1673
1674         athub_v1_0_set_clockgating(adev, state);
1675
1676         return 0;
1677 }
1678
1679 static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
1680 {
1681         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1682
1683         adev->mmhub.funcs->get_clockgating(adev, flags);
1684
1685         athub_v1_0_get_clockgating(adev, flags);
1686 }
1687
1688 static int gmc_v9_0_set_powergating_state(void *handle,
1689                                         enum amd_powergating_state state)
1690 {
1691         return 0;
1692 }
1693
1694 const struct amd_ip_funcs gmc_v9_0_ip_funcs = {
1695         .name = "gmc_v9_0",
1696         .early_init = gmc_v9_0_early_init,
1697         .late_init = gmc_v9_0_late_init,
1698         .sw_init = gmc_v9_0_sw_init,
1699         .sw_fini = gmc_v9_0_sw_fini,
1700         .hw_init = gmc_v9_0_hw_init,
1701         .hw_fini = gmc_v9_0_hw_fini,
1702         .suspend = gmc_v9_0_suspend,
1703         .resume = gmc_v9_0_resume,
1704         .is_idle = gmc_v9_0_is_idle,
1705         .wait_for_idle = gmc_v9_0_wait_for_idle,
1706         .soft_reset = gmc_v9_0_soft_reset,
1707         .set_clockgating_state = gmc_v9_0_set_clockgating_state,
1708         .set_powergating_state = gmc_v9_0_set_powergating_state,
1709         .get_clockgating_state = gmc_v9_0_get_clockgating_state,
1710 };
1711
1712 const struct amdgpu_ip_block_version gmc_v9_0_ip_block =
1713 {
1714         .type = AMD_IP_BLOCK_TYPE_GMC,
1715         .major = 9,
1716         .minor = 0,
1717         .rev = 0,
1718         .funcs = &gmc_v9_0_ip_funcs,
1719 };