Merge tag 'for-5.11/block-2020-12-14' of git://git.kernel.dk/linux-block
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / gmc_v10_0.c
1 /*
2  * Copyright 2019 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include <linux/pci.h>
25 #include "amdgpu.h"
26 #include "amdgpu_atomfirmware.h"
27 #include "gmc_v10_0.h"
28 #include "umc_v8_7.h"
29
30 #include "hdp/hdp_5_0_0_offset.h"
31 #include "hdp/hdp_5_0_0_sh_mask.h"
32 #include "athub/athub_2_0_0_sh_mask.h"
33 #include "athub/athub_2_0_0_offset.h"
34 #include "dcn/dcn_2_0_0_offset.h"
35 #include "dcn/dcn_2_0_0_sh_mask.h"
36 #include "oss/osssys_5_0_0_offset.h"
37 #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
38 #include "navi10_enum.h"
39
40 #include "soc15.h"
41 #include "soc15d.h"
42 #include "soc15_common.h"
43
44 #include "nbio_v2_3.h"
45
46 #include "gfxhub_v2_0.h"
47 #include "gfxhub_v2_1.h"
48 #include "mmhub_v2_0.h"
49 #include "mmhub_v2_3.h"
50 #include "athub_v2_0.h"
51 #include "athub_v2_1.h"
52
53 #if 0
54 static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
55 {
56         /* TODO add golden setting for hdp */
57 };
58 #endif
59
60 static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev,
61                                          struct amdgpu_irq_src *src,
62                                          unsigned type,
63                                          enum amdgpu_interrupt_state state)
64 {
65         return 0;
66 }
67
68 static int
69 gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
70                                    struct amdgpu_irq_src *src, unsigned type,
71                                    enum amdgpu_interrupt_state state)
72 {
73         switch (state) {
74         case AMDGPU_IRQ_STATE_DISABLE:
75                 /* MM HUB */
76                 amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false);
77                 /* GFX HUB */
78                 amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false);
79                 break;
80         case AMDGPU_IRQ_STATE_ENABLE:
81                 /* MM HUB */
82                 amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true);
83                 /* GFX HUB */
84                 amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true);
85                 break;
86         default:
87                 break;
88         }
89
90         return 0;
91 }
92
93 static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
94                                        struct amdgpu_irq_src *source,
95                                        struct amdgpu_iv_entry *entry)
96 {
97         bool retry_fault = !!(entry->src_data[1] & 0x80);
98         struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
99         struct amdgpu_task_info task_info;
100         uint32_t status = 0;
101         u64 addr;
102
103         addr = (u64)entry->src_data[0] << 12;
104         addr |= ((u64)entry->src_data[1] & 0xf) << 44;
105
106         if (retry_fault) {
107                 /* Returning 1 here also prevents sending the IV to the KFD */
108
109                 /* Process it onyl if it's the first fault for this address */
110                 if (entry->ih != &adev->irq.ih_soft &&
111                     amdgpu_gmc_filter_faults(adev, addr, entry->pasid,
112                                              entry->timestamp))
113                         return 1;
114
115                 /* Delegate it to a different ring if the hardware hasn't
116                  * already done it.
117                  */
118                 if (in_interrupt()) {
119                         amdgpu_irq_delegate(adev, entry, 8);
120                         return 1;
121                 }
122
123                 /* Try to handle the recoverable page faults by filling page
124                  * tables
125                  */
126                 if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
127                         return 1;
128         }
129
130         if (!amdgpu_sriov_vf(adev)) {
131                 /*
132                  * Issue a dummy read to wait for the status register to
133                  * be updated to avoid reading an incorrect value due to
134                  * the new fast GRBM interface.
135                  */
136                 if ((entry->vmid_src == AMDGPU_GFXHUB_0) &&
137                     (adev->asic_type < CHIP_SIENNA_CICHLID))
138                         RREG32(hub->vm_l2_pro_fault_status);
139
140                 status = RREG32(hub->vm_l2_pro_fault_status);
141                 WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
142         }
143
144         if (!printk_ratelimit())
145                 return 0;
146
147         memset(&task_info, 0, sizeof(struct amdgpu_task_info));
148         amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
149
150         dev_err(adev->dev,
151                 "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
152                 "for process %s pid %d thread %s pid %d)\n",
153                 entry->vmid_src ? "mmhub" : "gfxhub",
154                 entry->src_id, entry->ring_id, entry->vmid,
155                 entry->pasid, task_info.process_name, task_info.tgid,
156                 task_info.task_name, task_info.pid);
157         dev_err(adev->dev, "  in page starting at address 0x%012llx from client %d\n",
158                 addr, entry->client_id);
159
160         if (!amdgpu_sriov_vf(adev))
161                 hub->vmhub_funcs->print_l2_protection_fault_status(adev,
162                                                                    status);
163
164         return 0;
165 }
166
167 static const struct amdgpu_irq_src_funcs gmc_v10_0_irq_funcs = {
168         .set = gmc_v10_0_vm_fault_interrupt_state,
169         .process = gmc_v10_0_process_interrupt,
170 };
171
172 static const struct amdgpu_irq_src_funcs gmc_v10_0_ecc_funcs = {
173         .set = gmc_v10_0_ecc_interrupt_state,
174         .process = amdgpu_umc_process_ecc_irq,
175 };
176
177 static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev)
178 {
179         adev->gmc.vm_fault.num_types = 1;
180         adev->gmc.vm_fault.funcs = &gmc_v10_0_irq_funcs;
181
182         if (!amdgpu_sriov_vf(adev)) {
183                 adev->gmc.ecc_irq.num_types = 1;
184                 adev->gmc.ecc_irq.funcs = &gmc_v10_0_ecc_funcs;
185         }
186 }
187
188 /**
189  * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
190  *
191  * @adev: amdgpu_device pointer
192  * @vmhub: vmhub type
193  *
194  */
195 static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
196                                        uint32_t vmhub)
197 {
198         return ((vmhub == AMDGPU_MMHUB_0 ||
199                  vmhub == AMDGPU_MMHUB_1) &&
200                 (!amdgpu_sriov_vf(adev)));
201 }
202
203 static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
204                                         struct amdgpu_device *adev,
205                                         uint8_t vmid, uint16_t *p_pasid)
206 {
207         uint32_t value;
208
209         value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
210                      + vmid);
211         *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
212
213         return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
214 }
215
216 /*
217  * GART
218  * VMID 0 is the physical GPU addresses as used by the kernel.
219  * VMIDs 1-15 are used for userspace clients and are handled
220  * by the amdgpu vm/hsa code.
221  */
222
223 static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
224                                    unsigned int vmhub, uint32_t flush_type)
225 {
226         bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
227         struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
228         u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
229         u32 tmp;
230         /* Use register 17 for GART */
231         const unsigned eng = 17;
232         unsigned int i;
233
234         spin_lock(&adev->gmc.invalidate_lock);
235         /*
236          * It may lose gpuvm invalidate acknowldege state across power-gating
237          * off cycle, add semaphore acquire before invalidation and semaphore
238          * release after invalidation to avoid entering power gated state
239          * to WA the Issue
240          */
241
242         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
243         if (use_semaphore) {
244                 for (i = 0; i < adev->usec_timeout; i++) {
245                         /* a read return value of 1 means semaphore acuqire */
246                         tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem +
247                                             hub->eng_distance * eng);
248                         if (tmp & 0x1)
249                                 break;
250                         udelay(1);
251                 }
252
253                 if (i >= adev->usec_timeout)
254                         DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
255         }
256
257         WREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req);
258
259         /*
260          * Issue a dummy read to wait for the ACK register to be cleared
261          * to avoid a false ACK due to the new fast GRBM interface.
262          */
263         if ((vmhub == AMDGPU_GFXHUB_0) &&
264             (adev->asic_type < CHIP_SIENNA_CICHLID))
265                 RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng);
266
267         /* Wait for ACK with a delay.*/
268         for (i = 0; i < adev->usec_timeout; i++) {
269                 tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack +
270                                     hub->eng_distance * eng);
271                 tmp &= 1 << vmid;
272                 if (tmp)
273                         break;
274
275                 udelay(1);
276         }
277
278         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
279         if (use_semaphore)
280                 /*
281                  * add semaphore release after invalidation,
282                  * write with 0 means semaphore release
283                  */
284                 WREG32_NO_KIQ(hub->vm_inv_eng0_sem +
285                               hub->eng_distance * eng, 0);
286
287         spin_unlock(&adev->gmc.invalidate_lock);
288
289         if (i < adev->usec_timeout)
290                 return;
291
292         DRM_ERROR("Timeout waiting for VM flush ACK!\n");
293 }
294
295 /**
296  * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
297  *
298  * @adev: amdgpu_device pointer
299  * @vmid: vm instance to flush
300  * @vmhub: vmhub type
301  * @flush_type: the flush type
302  *
303  * Flush the TLB for the requested page table.
304  */
305 static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
306                                         uint32_t vmhub, uint32_t flush_type)
307 {
308         struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
309         struct dma_fence *fence;
310         struct amdgpu_job *job;
311
312         int r;
313
314         /* flush hdp cache */
315         adev->nbio.funcs->hdp_flush(adev, NULL);
316
317         /* For SRIOV run time, driver shouldn't access the register through MMIO
318          * Directly use kiq to do the vm invalidation instead
319          */
320         if (adev->gfx.kiq.ring.sched.ready &&
321             (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
322             down_read_trylock(&adev->reset_sem)) {
323                 struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
324                 const unsigned eng = 17;
325                 u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
326                 u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
327                 u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
328
329                 amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
330                                 1 << vmid);
331
332                 up_read(&adev->reset_sem);
333                 return;
334         }
335
336         mutex_lock(&adev->mman.gtt_window_lock);
337
338         if (vmhub == AMDGPU_MMHUB_0) {
339                 gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
340                 mutex_unlock(&adev->mman.gtt_window_lock);
341                 return;
342         }
343
344         BUG_ON(vmhub != AMDGPU_GFXHUB_0);
345
346         if (!adev->mman.buffer_funcs_enabled ||
347             !adev->ib_pool_ready ||
348             amdgpu_in_reset(adev) ||
349             ring->sched.ready == false) {
350                 gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0);
351                 mutex_unlock(&adev->mman.gtt_window_lock);
352                 return;
353         }
354
355         /* The SDMA on Navi has a bug which can theoretically result in memory
356          * corruption if an invalidation happens at the same time as an VA
357          * translation. Avoid this by doing the invalidation from the SDMA
358          * itself.
359          */
360         r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
361                                      &job);
362         if (r)
363                 goto error_alloc;
364
365         job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
366         job->vm_needs_flush = true;
367         job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
368         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
369         r = amdgpu_job_submit(job, &adev->mman.entity,
370                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
371         if (r)
372                 goto error_submit;
373
374         mutex_unlock(&adev->mman.gtt_window_lock);
375
376         dma_fence_wait(fence, false);
377         dma_fence_put(fence);
378
379         return;
380
381 error_submit:
382         amdgpu_job_free(job);
383
384 error_alloc:
385         mutex_unlock(&adev->mman.gtt_window_lock);
386         DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
387 }
388
389 /**
390  * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
391  *
392  * @adev: amdgpu_device pointer
393  * @pasid: pasid to be flush
394  * @flush_type: the flush type
395  * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB()
396  *
397  * Flush the TLB for the requested pasid.
398  */
399 static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
400                                         uint16_t pasid, uint32_t flush_type,
401                                         bool all_hub)
402 {
403         int vmid, i;
404         signed long r;
405         uint32_t seq;
406         uint16_t queried_pasid;
407         bool ret;
408         struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
409         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
410
411         if (amdgpu_emu_mode == 0 && ring->sched.ready) {
412                 spin_lock(&adev->gfx.kiq.ring_lock);
413                 /* 2 dwords flush + 8 dwords fence */
414                 amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
415                 kiq->pmf->kiq_invalidate_tlbs(ring,
416                                         pasid, flush_type, all_hub);
417                 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
418                 if (r) {
419                         amdgpu_ring_undo(ring);
420                         spin_unlock(&adev->gfx.kiq.ring_lock);
421                         return -ETIME;
422                 }
423
424                 amdgpu_ring_commit(ring);
425                 spin_unlock(&adev->gfx.kiq.ring_lock);
426                 r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
427                 if (r < 1) {
428                         dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
429                         return -ETIME;
430                 }
431
432                 return 0;
433         }
434
435         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
436
437                 ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
438                                 &queried_pasid);
439                 if (ret && queried_pasid == pasid) {
440                         if (all_hub) {
441                                 for (i = 0; i < adev->num_vmhubs; i++)
442                                         gmc_v10_0_flush_gpu_tlb(adev, vmid,
443                                                         i, flush_type);
444                         } else {
445                                 gmc_v10_0_flush_gpu_tlb(adev, vmid,
446                                                 AMDGPU_GFXHUB_0, flush_type);
447                         }
448                         break;
449                 }
450         }
451
452         return 0;
453 }
454
455 static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
456                                              unsigned vmid, uint64_t pd_addr)
457 {
458         bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
459         struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
460         uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0);
461         unsigned eng = ring->vm_inv_eng;
462
463         /*
464          * It may lose gpuvm invalidate acknowldege state across power-gating
465          * off cycle, add semaphore acquire before invalidation and semaphore
466          * release after invalidation to avoid entering power gated state
467          * to WA the Issue
468          */
469
470         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
471         if (use_semaphore)
472                 /* a read return value of 1 means semaphore acuqire */
473                 amdgpu_ring_emit_reg_wait(ring,
474                                           hub->vm_inv_eng0_sem +
475                                           hub->eng_distance * eng, 0x1, 0x1);
476
477         amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 +
478                               (hub->ctx_addr_distance * vmid),
479                               lower_32_bits(pd_addr));
480
481         amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 +
482                               (hub->ctx_addr_distance * vmid),
483                               upper_32_bits(pd_addr));
484
485         amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req +
486                                             hub->eng_distance * eng,
487                                             hub->vm_inv_eng0_ack +
488                                             hub->eng_distance * eng,
489                                             req, 1 << vmid);
490
491         /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
492         if (use_semaphore)
493                 /*
494                  * add semaphore release after invalidation,
495                  * write with 0 means semaphore release
496                  */
497                 amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem +
498                                       hub->eng_distance * eng, 0);
499
500         return pd_addr;
501 }
502
503 static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
504                                          unsigned pasid)
505 {
506         struct amdgpu_device *adev = ring->adev;
507         uint32_t reg;
508
509         if (ring->funcs->vmhub == AMDGPU_GFXHUB_0)
510                 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
511         else
512                 reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
513
514         amdgpu_ring_emit_wreg(ring, reg, pasid);
515 }
516
517 /*
518  * PTE format on NAVI 10:
519  * 63:59 reserved
520  * 58 reserved and for sienna_cichlid is used for MALL noalloc
521  * 57 reserved
522  * 56 F
523  * 55 L
524  * 54 reserved
525  * 53:52 SW
526  * 51 T
527  * 50:48 mtype
528  * 47:12 4k physical page base address
529  * 11:7 fragment
530  * 6 write
531  * 5 read
532  * 4 exe
533  * 3 Z
534  * 2 snooped
535  * 1 system
536  * 0 valid
537  *
538  * PDE format on NAVI 10:
539  * 63:59 block fragment size
540  * 58:55 reserved
541  * 54 P
542  * 53:48 reserved
543  * 47:6 physical base address of PD or PTE
544  * 5:3 reserved
545  * 2 C
546  * 1 system
547  * 0 valid
548  */
549
550 static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
551 {
552         switch (flags) {
553         case AMDGPU_VM_MTYPE_DEFAULT:
554                 return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
555         case AMDGPU_VM_MTYPE_NC:
556                 return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
557         case AMDGPU_VM_MTYPE_WC:
558                 return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
559         case AMDGPU_VM_MTYPE_CC:
560                 return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
561         case AMDGPU_VM_MTYPE_UC:
562                 return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
563         default:
564                 return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
565         }
566 }
567
568 static void gmc_v10_0_get_vm_pde(struct amdgpu_device *adev, int level,
569                                  uint64_t *addr, uint64_t *flags)
570 {
571         if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM))
572                 *addr = adev->vm_manager.vram_base_offset + *addr -
573                         adev->gmc.vram_start;
574         BUG_ON(*addr & 0xFFFF00000000003FULL);
575
576         if (!adev->gmc.translate_further)
577                 return;
578
579         if (level == AMDGPU_VM_PDB1) {
580                 /* Set the block fragment size */
581                 if (!(*flags & AMDGPU_PDE_PTE))
582                         *flags |= AMDGPU_PDE_BFS(0x9);
583
584         } else if (level == AMDGPU_VM_PDB0) {
585                 if (*flags & AMDGPU_PDE_PTE)
586                         *flags &= ~AMDGPU_PDE_PTE;
587                 else
588                         *flags |= AMDGPU_PTE_TF;
589         }
590 }
591
592 static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
593                                  struct amdgpu_bo_va_mapping *mapping,
594                                  uint64_t *flags)
595 {
596         *flags &= ~AMDGPU_PTE_EXECUTABLE;
597         *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
598
599         *flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK;
600         *flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK);
601
602         if (mapping->flags & AMDGPU_PTE_PRT) {
603                 *flags |= AMDGPU_PTE_PRT;
604                 *flags |= AMDGPU_PTE_SNOOPED;
605                 *flags |= AMDGPU_PTE_LOG;
606                 *flags |= AMDGPU_PTE_SYSTEM;
607                 *flags &= ~AMDGPU_PTE_VALID;
608         }
609 }
610
611 static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
612 {
613         u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL);
614         unsigned size;
615
616         if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) {
617                 size = AMDGPU_VBIOS_VGA_ALLOCATION;
618         } else {
619                 u32 viewport;
620                 u32 pitch;
621
622                 viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION);
623                 pitch = RREG32_SOC15(DCE, 0, mmHUBPREQ0_DCSURF_SURFACE_PITCH);
624                 size = (REG_GET_FIELD(viewport,
625                                         HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) *
626                                 REG_GET_FIELD(pitch, HUBPREQ0_DCSURF_SURFACE_PITCH, PITCH) *
627                                 4);
628         }
629
630         return size;
631 }
632
633 static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
634         .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
635         .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
636         .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
637         .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
638         .map_mtype = gmc_v10_0_map_mtype,
639         .get_vm_pde = gmc_v10_0_get_vm_pde,
640         .get_vm_pte = gmc_v10_0_get_vm_pte,
641         .get_vbios_fb_size = gmc_v10_0_get_vbios_fb_size,
642 };
643
644 static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev)
645 {
646         if (adev->gmc.gmc_funcs == NULL)
647                 adev->gmc.gmc_funcs = &gmc_v10_0_gmc_funcs;
648 }
649
650 static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
651 {
652         switch (adev->asic_type) {
653         case CHIP_SIENNA_CICHLID:
654                 adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM;
655                 adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM;
656                 adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
657                 adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
658                 adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
659                 adev->umc.funcs = &umc_v8_7_funcs;
660                 break;
661         default:
662                 break;
663         }
664 }
665
666
667 static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev)
668 {
669         switch (adev->asic_type) {
670         case CHIP_VANGOGH:
671                 adev->mmhub.funcs = &mmhub_v2_3_funcs;
672                 break;
673         default:
674                 adev->mmhub.funcs = &mmhub_v2_0_funcs;
675                 break;
676         }
677 }
678
679 static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev)
680 {
681         switch (adev->asic_type) {
682         case CHIP_SIENNA_CICHLID:
683         case CHIP_NAVY_FLOUNDER:
684         case CHIP_VANGOGH:
685         case CHIP_DIMGREY_CAVEFISH:
686                 adev->gfxhub.funcs = &gfxhub_v2_1_funcs;
687                 break;
688         default:
689                 adev->gfxhub.funcs = &gfxhub_v2_0_funcs;
690                 break;
691         }
692 }
693
694
695 static int gmc_v10_0_early_init(void *handle)
696 {
697         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
698
699         gmc_v10_0_set_mmhub_funcs(adev);
700         gmc_v10_0_set_gfxhub_funcs(adev);
701         gmc_v10_0_set_gmc_funcs(adev);
702         gmc_v10_0_set_irq_funcs(adev);
703         gmc_v10_0_set_umc_funcs(adev);
704
705         adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
706         adev->gmc.shared_aperture_end =
707                 adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
708         adev->gmc.private_aperture_start = 0x1000000000000000ULL;
709         adev->gmc.private_aperture_end =
710                 adev->gmc.private_aperture_start + (4ULL << 30) - 1;
711
712         return 0;
713 }
714
715 static int gmc_v10_0_late_init(void *handle)
716 {
717         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
718         int r;
719
720         r = amdgpu_gmc_allocate_vm_inv_eng(adev);
721         if (r)
722                 return r;
723
724         r = amdgpu_gmc_ras_late_init(adev);
725         if (r)
726                 return r;
727
728         return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
729 }
730
731 static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev,
732                                         struct amdgpu_gmc *mc)
733 {
734         u64 base = 0;
735
736         base = adev->gfxhub.funcs->get_fb_location(adev);
737
738         /* add the xgmi offset of the physical node */
739         base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
740
741         amdgpu_gmc_vram_location(adev, &adev->gmc, base);
742         amdgpu_gmc_gart_location(adev, mc);
743         amdgpu_gmc_agp_location(adev, mc);
744
745         /* base offset of vram pages */
746         adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev);
747
748         /* add the xgmi offset of the physical node */
749         adev->vm_manager.vram_base_offset +=
750                 adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size;
751 }
752
753 /**
754  * gmc_v10_0_mc_init - initialize the memory controller driver params
755  *
756  * @adev: amdgpu_device pointer
757  *
758  * Look up the amount of vram, vram width, and decide how to place
759  * vram and gart within the GPU's physical address space.
760  * Returns 0 for success.
761  */
762 static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
763 {
764         int r;
765
766         /* size in MB on si */
767         adev->gmc.mc_vram_size =
768                 adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
769         adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
770
771         if (!(adev->flags & AMD_IS_APU)) {
772                 r = amdgpu_device_resize_fb_bar(adev);
773                 if (r)
774                         return r;
775         }
776         adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
777         adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
778
779 #ifdef CONFIG_X86_64
780         if (adev->flags & AMD_IS_APU) {
781                 adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev);
782                 adev->gmc.aper_size = adev->gmc.real_vram_size;
783         }
784 #endif
785
786         /* In case the PCI BAR is larger than the actual amount of vram */
787         adev->gmc.visible_vram_size = adev->gmc.aper_size;
788         if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
789                 adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
790
791         /* set the gart size */
792         if (amdgpu_gart_size == -1) {
793                 switch (adev->asic_type) {
794                 case CHIP_NAVI10:
795                 case CHIP_NAVI14:
796                 case CHIP_NAVI12:
797                 case CHIP_SIENNA_CICHLID:
798                 case CHIP_NAVY_FLOUNDER:
799                 case CHIP_VANGOGH:
800                 case CHIP_DIMGREY_CAVEFISH:
801                 default:
802                         adev->gmc.gart_size = 512ULL << 20;
803                         break;
804                 }
805         } else
806                 adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
807
808         gmc_v10_0_vram_gtt_location(adev, &adev->gmc);
809
810         return 0;
811 }
812
813 static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
814 {
815         int r;
816
817         if (adev->gart.bo) {
818                 WARN(1, "NAVI10 PCIE GART already initialized\n");
819                 return 0;
820         }
821
822         /* Initialize common gart structure */
823         r = amdgpu_gart_init(adev);
824         if (r)
825                 return r;
826
827         adev->gart.table_size = adev->gart.num_gpu_pages * 8;
828         adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
829                                  AMDGPU_PTE_EXECUTABLE;
830
831         return amdgpu_gart_table_vram_alloc(adev);
832 }
833
834 static int gmc_v10_0_sw_init(void *handle)
835 {
836         int r, vram_width = 0, vram_type = 0, vram_vendor = 0;
837         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
838
839         adev->gfxhub.funcs->init(adev);
840
841         adev->mmhub.funcs->init(adev);
842
843         spin_lock_init(&adev->gmc.invalidate_lock);
844
845         if ((adev->flags & AMD_IS_APU) && amdgpu_emu_mode == 1) {
846                 adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4;
847                 adev->gmc.vram_width = 64;
848         } else if (amdgpu_emu_mode == 1) {
849                 adev->gmc.vram_type = AMDGPU_VRAM_TYPE_GDDR6;
850                 adev->gmc.vram_width = 1 * 128; /* numchan * chansize */
851         } else {
852                 r = amdgpu_atomfirmware_get_vram_info(adev,
853                                 &vram_width, &vram_type, &vram_vendor);
854                 adev->gmc.vram_width = vram_width;
855
856                 adev->gmc.vram_type = vram_type;
857                 adev->gmc.vram_vendor = vram_vendor;
858         }
859
860         switch (adev->asic_type) {
861         case CHIP_NAVI10:
862         case CHIP_NAVI14:
863         case CHIP_NAVI12:
864         case CHIP_SIENNA_CICHLID:
865         case CHIP_NAVY_FLOUNDER:
866         case CHIP_VANGOGH:
867         case CHIP_DIMGREY_CAVEFISH:
868                 adev->num_vmhubs = 2;
869                 /*
870                  * To fulfill 4-level page support,
871                  * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12,
872                  * block size 512 (9bit)
873                  */
874                 amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
875                 break;
876         default:
877                 break;
878         }
879
880         /* This interrupt is VMC page fault.*/
881         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC,
882                               VMC_1_0__SRCID__VM_FAULT,
883                               &adev->gmc.vm_fault);
884
885         if (r)
886                 return r;
887
888         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2,
889                               UTCL2_1_0__SRCID__FAULT,
890                               &adev->gmc.vm_fault);
891         if (r)
892                 return r;
893
894         if (!amdgpu_sriov_vf(adev)) {
895                 /* interrupt sent to DF. */
896                 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0,
897                                       &adev->gmc.ecc_irq);
898                 if (r)
899                         return r;
900         }
901
902         /*
903          * Set the internal MC address mask This is the max address of the GPU's
904          * internal address space.
905          */
906         adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
907
908         r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44));
909         if (r) {
910                 printk(KERN_WARNING "amdgpu: No suitable DMA available.\n");
911                 return r;
912         }
913
914         if (adev->gmc.xgmi.supported) {
915                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
916                 if (r)
917                         return r;
918         }
919
920         r = gmc_v10_0_mc_init(adev);
921         if (r)
922                 return r;
923
924         amdgpu_gmc_get_vbios_allocations(adev);
925
926         /* Memory manager */
927         r = amdgpu_bo_init(adev);
928         if (r)
929                 return r;
930
931         r = gmc_v10_0_gart_init(adev);
932         if (r)
933                 return r;
934
935         /*
936          * number of VMs
937          * VMID 0 is reserved for System
938          * amdgpu graphics/compute will use VMIDs 1-7
939          * amdkfd will use VMIDs 8-15
940          */
941         adev->vm_manager.first_kfd_vmid = 8;
942
943         amdgpu_vm_manager_init(adev);
944
945         return 0;
946 }
947
948 /**
949  * gmc_v8_0_gart_fini - vm fini callback
950  *
951  * @adev: amdgpu_device pointer
952  *
953  * Tears down the driver GART/VM setup (CIK).
954  */
955 static void gmc_v10_0_gart_fini(struct amdgpu_device *adev)
956 {
957         amdgpu_gart_table_vram_free(adev);
958         amdgpu_gart_fini(adev);
959 }
960
961 static int gmc_v10_0_sw_fini(void *handle)
962 {
963         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
964
965         amdgpu_vm_manager_fini(adev);
966         gmc_v10_0_gart_fini(adev);
967         amdgpu_gem_force_release(adev);
968         amdgpu_bo_fini(adev);
969
970         return 0;
971 }
972
973 static void gmc_v10_0_init_golden_registers(struct amdgpu_device *adev)
974 {
975         switch (adev->asic_type) {
976         case CHIP_NAVI10:
977         case CHIP_NAVI14:
978         case CHIP_NAVI12:
979         case CHIP_SIENNA_CICHLID:
980         case CHIP_NAVY_FLOUNDER:
981         case CHIP_VANGOGH:
982         case CHIP_DIMGREY_CAVEFISH:
983                 break;
984         default:
985                 break;
986         }
987 }
988
989 /**
990  * gmc_v10_0_gart_enable - gart enable
991  *
992  * @adev: amdgpu_device pointer
993  */
994 static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
995 {
996         int r;
997         bool value;
998         u32 tmp;
999
1000         if (adev->gart.bo == NULL) {
1001                 dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
1002                 return -EINVAL;
1003         }
1004
1005         r = amdgpu_gart_table_vram_pin(adev);
1006         if (r)
1007                 return r;
1008
1009         r = adev->gfxhub.funcs->gart_enable(adev);
1010         if (r)
1011                 return r;
1012
1013         r = adev->mmhub.funcs->gart_enable(adev);
1014         if (r)
1015                 return r;
1016
1017         tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL);
1018         tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK;
1019         WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp);
1020
1021         tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL);
1022         WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
1023
1024         /* Flush HDP after it is initialized */
1025         adev->nbio.funcs->hdp_flush(adev, NULL);
1026
1027         value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
1028                 false : true;
1029
1030         adev->gfxhub.funcs->set_fault_enable_default(adev, value);
1031         adev->mmhub.funcs->set_fault_enable_default(adev, value);
1032         gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
1033         gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
1034
1035         DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
1036                  (unsigned)(adev->gmc.gart_size >> 20),
1037                  (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo));
1038
1039         adev->gart.ready = true;
1040
1041         return 0;
1042 }
1043
1044 static int gmc_v10_0_hw_init(void *handle)
1045 {
1046         int r;
1047         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1048
1049         /* The sequence of these two function calls matters.*/
1050         gmc_v10_0_init_golden_registers(adev);
1051
1052         r = gmc_v10_0_gart_enable(adev);
1053         if (r)
1054                 return r;
1055
1056         if (adev->umc.funcs && adev->umc.funcs->init_registers)
1057                 adev->umc.funcs->init_registers(adev);
1058
1059         return 0;
1060 }
1061
1062 /**
1063  * gmc_v10_0_gart_disable - gart disable
1064  *
1065  * @adev: amdgpu_device pointer
1066  *
1067  * This disables all VM page table.
1068  */
1069 static void gmc_v10_0_gart_disable(struct amdgpu_device *adev)
1070 {
1071         adev->gfxhub.funcs->gart_disable(adev);
1072         adev->mmhub.funcs->gart_disable(adev);
1073         amdgpu_gart_table_vram_unpin(adev);
1074 }
1075
1076 static int gmc_v10_0_hw_fini(void *handle)
1077 {
1078         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1079
1080         if (amdgpu_sriov_vf(adev)) {
1081                 /* full access mode, so don't touch any GMC register */
1082                 DRM_DEBUG("For SRIOV client, shouldn't do anything.\n");
1083                 return 0;
1084         }
1085
1086         amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0);
1087         amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
1088         gmc_v10_0_gart_disable(adev);
1089
1090         return 0;
1091 }
1092
1093 static int gmc_v10_0_suspend(void *handle)
1094 {
1095         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1096
1097         gmc_v10_0_hw_fini(adev);
1098
1099         return 0;
1100 }
1101
1102 static int gmc_v10_0_resume(void *handle)
1103 {
1104         int r;
1105         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1106
1107         r = gmc_v10_0_hw_init(adev);
1108         if (r)
1109                 return r;
1110
1111         amdgpu_vmid_reset_all(adev);
1112
1113         return 0;
1114 }
1115
1116 static bool gmc_v10_0_is_idle(void *handle)
1117 {
1118         /* MC is always ready in GMC v10.*/
1119         return true;
1120 }
1121
1122 static int gmc_v10_0_wait_for_idle(void *handle)
1123 {
1124         /* There is no need to wait for MC idle in GMC v10.*/
1125         return 0;
1126 }
1127
1128 static int gmc_v10_0_soft_reset(void *handle)
1129 {
1130         return 0;
1131 }
1132
1133 static int gmc_v10_0_set_clockgating_state(void *handle,
1134                                            enum amd_clockgating_state state)
1135 {
1136         int r;
1137         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1138
1139         r = adev->mmhub.funcs->set_clockgating(adev, state);
1140         if (r)
1141                 return r;
1142
1143         if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
1144             adev->asic_type <= CHIP_DIMGREY_CAVEFISH)
1145                 return athub_v2_1_set_clockgating(adev, state);
1146         else
1147                 return athub_v2_0_set_clockgating(adev, state);
1148 }
1149
1150 static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
1151 {
1152         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1153
1154         adev->mmhub.funcs->get_clockgating(adev, flags);
1155
1156         if (adev->asic_type >= CHIP_SIENNA_CICHLID &&
1157             adev->asic_type <= CHIP_DIMGREY_CAVEFISH)
1158                 athub_v2_1_get_clockgating(adev, flags);
1159         else
1160                 athub_v2_0_get_clockgating(adev, flags);
1161 }
1162
1163 static int gmc_v10_0_set_powergating_state(void *handle,
1164                                            enum amd_powergating_state state)
1165 {
1166         return 0;
1167 }
1168
1169 const struct amd_ip_funcs gmc_v10_0_ip_funcs = {
1170         .name = "gmc_v10_0",
1171         .early_init = gmc_v10_0_early_init,
1172         .late_init = gmc_v10_0_late_init,
1173         .sw_init = gmc_v10_0_sw_init,
1174         .sw_fini = gmc_v10_0_sw_fini,
1175         .hw_init = gmc_v10_0_hw_init,
1176         .hw_fini = gmc_v10_0_hw_fini,
1177         .suspend = gmc_v10_0_suspend,
1178         .resume = gmc_v10_0_resume,
1179         .is_idle = gmc_v10_0_is_idle,
1180         .wait_for_idle = gmc_v10_0_wait_for_idle,
1181         .soft_reset = gmc_v10_0_soft_reset,
1182         .set_clockgating_state = gmc_v10_0_set_clockgating_state,
1183         .set_powergating_state = gmc_v10_0_set_powergating_state,
1184         .get_clockgating_state = gmc_v10_0_get_clockgating_state,
1185 };
1186
1187 const struct amdgpu_ip_block_version gmc_v10_0_ip_block =
1188 {
1189         .type = AMD_IP_BLOCK_TYPE_GMC,
1190         .major = 10,
1191         .minor = 0,
1192         .rev = 0,
1193         .funcs = &gmc_v10_0_ip_funcs,
1194 };