Merge tag 'for-linus-hmm' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ttm.c
index c520290..6309ff7 100644 (file)
 #include "amdgpu_ras.h"
 #include "bif/bif_4_1_d.h"
 
+#define AMDGPU_TTM_VRAM_MAX_DW_READ    (size_t)128
+
 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
                             struct ttm_mem_reg *mem, unsigned num_pages,
                             uint64_t offset, unsigned window,
                             struct amdgpu_ring *ring,
                             uint64_t *addr);
 
-static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
-static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev);
-
-static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
-{
-       return 0;
-}
-
 /**
  * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
  * memory request.
@@ -973,7 +967,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
        /* Map SG to device */
        r = -ENOMEM;
        nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
-       if (nents != ttm->sg->nents)
+       if (nents == 0)
                goto release_sg;
 
        /* convert SG to linear array of pages and dma addresses */
@@ -1033,7 +1027,7 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
        int r;
 
-       if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+       if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
                uint64_t page_idx = 1;
 
                r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
@@ -1041,7 +1035,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
                if (r)
                        goto gart_bind_fail;
 
-               /* Patch mtype of the second part BO */
+               /* The memory type of the first page defaults to UC. Now
+                * modify the memory type to NC from the second page of
+                * the BO onward.
+                */
                flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
                flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
 
@@ -1595,7 +1592,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 
        while (len && pos < adev->gmc.mc_vram_size) {
                uint64_t aligned_pos = pos & ~(uint64_t)3;
-               uint32_t bytes = 4 - (pos & 3);
+               uint64_t bytes = 4 - (pos & 3);
                uint32_t shift = (pos & 3) * 8;
                uint32_t mask = 0xffffffff << shift;
 
@@ -1604,20 +1601,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
                        bytes = len;
                }
 
-               spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-               WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
-               WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-               if (!write || mask != 0xffffffff)
-                       value = RREG32_NO_KIQ(mmMM_DATA);
-               if (write) {
-                       value &= ~mask;
-                       value |= (*(uint32_t *)buf << shift) & mask;
-                       WREG32_NO_KIQ(mmMM_DATA, value);
-               }
-               spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-               if (!write) {
-                       value = (value & mask) >> shift;
-                       memcpy(buf, &value, bytes);
+               if (mask != 0xffffffff) {
+                       spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+                       WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
+                       WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
+                       if (!write || mask != 0xffffffff)
+                               value = RREG32_NO_KIQ(mmMM_DATA);
+                       if (write) {
+                               value &= ~mask;
+                               value |= (*(uint32_t *)buf << shift) & mask;
+                               WREG32_NO_KIQ(mmMM_DATA, value);
+                       }
+                       spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+                       if (!write) {
+                               value = (value & mask) >> shift;
+                               memcpy(buf, &value, bytes);
+                       }
+               } else {
+                       bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
+                       bytes = min(bytes - pos, (uint64_t)len & ~0x3ull);
+
+                       amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
+                                                 bytes, write);
                }
 
                ret += bytes;
@@ -1637,7 +1642,6 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
        .ttm_tt_create = &amdgpu_ttm_tt_create,
        .ttm_tt_populate = &amdgpu_ttm_tt_populate,
        .ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
-       .invalidate_caches = &amdgpu_invalidate_caches,
        .init_mem_type = &amdgpu_init_mem_type,
        .eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
        .evict_flags = &amdgpu_evict_flags,
@@ -1835,9 +1839,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
         *The reserved vram for memory training must be pinned to the specified
         *place on the VRAM, so reserve it early.
         */
-       r = amdgpu_ttm_training_reserve_vram_init(adev);
-       if (r)
-               return r;
+       if (!amdgpu_sriov_vf(adev)) {
+               r = amdgpu_ttm_training_reserve_vram_init(adev);
+               if (r)
+                       return r;
+       }
 
        /* allocate memory as required for VGA
         * This is used for VGA emulation and pre-OS scanout buffers to
@@ -1910,12 +1916,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                return r;
        }
 
-       /* Register debugfs entries for amdgpu_ttm */
-       r = amdgpu_ttm_debugfs_init(adev);
-       if (r) {
-               DRM_ERROR("Failed to init debugfs\n");
-               return r;
-       }
        return 0;
 }
 
@@ -1937,7 +1937,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
        if (!adev->mman.initialized)
                return;
 
-       amdgpu_ttm_debugfs_fini(adev);
        amdgpu_ttm_training_reserve_vram_fini(adev);
        /* return the IP Discovery TMR memory back to VRAM */
        amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
@@ -2112,8 +2111,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
        }
        if (resv) {
                r = amdgpu_sync_resv(adev, &job->sync, resv,
-                                    AMDGPU_FENCE_OWNER_UNDEFINED,
-                                    false);
+                                    AMDGPU_SYNC_ALWAYS,
+                                    AMDGPU_FENCE_OWNER_UNDEFINED);
                if (r) {
                        DRM_ERROR("sync failed (%d).\n", r);
                        goto error_free;
@@ -2197,7 +2196,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 
        if (resv) {
                r = amdgpu_sync_resv(adev, &job->sync, resv,
-                                    AMDGPU_FENCE_OWNER_UNDEFINED, false);
+                                    AMDGPU_SYNC_ALWAYS,
+                                    AMDGPU_FENCE_OWNER_UNDEFINED);
                if (r) {
                        DRM_ERROR("sync failed (%d).\n", r);
                        goto error_free;
@@ -2278,7 +2278,6 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
 {
        struct amdgpu_device *adev = file_inode(f)->i_private;
        ssize_t result = 0;
-       int r;
 
        if (size & 0x3 || *pos & 0x3)
                return -EINVAL;
@@ -2286,27 +2285,19 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
        if (*pos >= adev->gmc.mc_vram_size)
                return -ENXIO;
 
+       size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
        while (size) {
-               unsigned long flags;
-               uint32_t value;
-
-               if (*pos >= adev->gmc.mc_vram_size)
-                       return result;
-
-               spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-               WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
-               WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
-               value = RREG32_NO_KIQ(mmMM_DATA);
-               spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+               size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
+               uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
 
-               r = put_user(value, (uint32_t *)buf);
-               if (r)
-                       return r;
+               amdgpu_device_vram_access(adev, *pos, value, bytes, false);
+               if (copy_to_user(buf, value, bytes))
+                       return -EFAULT;
 
-               result += 4;
-               buf += 4;
-               *pos += 4;
-               size -= 4;
+               result += bytes;
+               buf += bytes;
+               *pos += bytes;
+               size -= bytes;
        }
 
        return result;
@@ -2543,7 +2534,7 @@ static const struct {
 
 #endif
 
-static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
+int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
 {
 #if defined(CONFIG_DEBUG_FS)
        unsigned count;
@@ -2578,13 +2569,3 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
        return 0;
 #endif
 }
-
-static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev)
-{
-#if defined(CONFIG_DEBUG_FS)
-       unsigned i;
-
-       for (i = 0; i < ARRAY_SIZE(ttm_debugfs_entries); i++)
-               debugfs_remove(adev->mman.debugfs_entries[i]);
-#endif
-}