Merge branch 'address-masking'
[linux-2.6-microblaze.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_ttm.c
index 8722beb..b8bc7fa 100644 (file)
@@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 
                } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
                           !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
-                          amdgpu_bo_in_cpu_visible_vram(abo)) {
+                          amdgpu_res_cpu_visible(adev, bo->resource)) {
 
                        /* Try evicting to the CPU inaccessible part of VRAM
                         * first, but only set GTT as busy placement, so this
@@ -236,7 +236,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
        dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
        dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
-                               dst_addr, num_bytes, false);
+                               dst_addr, num_bytes, 0);
 
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -295,6 +295,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
        struct amdgpu_res_cursor src_mm, dst_mm;
        struct dma_fence *fence = NULL;
        int r = 0;
+       uint32_t copy_flags = 0;
+       struct amdgpu_bo *abo_src, *abo_dst;
 
        if (!adev->mman.buffer_funcs_enabled) {
                DRM_ERROR("Trying to move memory with ring turned off.\n");
@@ -306,7 +308,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
 
        mutex_lock(&adev->mman.gtt_window_lock);
        while (src_mm.remaining) {
-               uint64_t from, to, cur_size;
+               uint64_t from, to, cur_size, tiling_flags;
+               uint32_t num_type, data_format, max_com;
                struct dma_fence *next;
 
                /* Never copy more than 256MiB at once to avoid a timeout */
@@ -323,8 +326,27 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
                if (r)
                        goto error;
 
-               r = amdgpu_copy_buffer(ring, from, to, cur_size,
-                                      resv, &next, false, true, tmz);
+               abo_src = ttm_to_amdgpu_bo(src->bo);
+               abo_dst = ttm_to_amdgpu_bo(dst->bo);
+               if (tmz)
+                       copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+               if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+                   (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
+                       copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
+               if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+                   (dst->mem->mem_type == TTM_PL_VRAM)) {
+                       copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
+                       amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
+                       max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+                       num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
+                       data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+                       copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
+                                      AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
+                                      AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format));
+               }
+
+               r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+                                      &next, false, true, copy_flags);
                if (r)
                        goto error;
 
@@ -378,11 +400,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
            (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
                struct dma_fence *wipe_fence = NULL;
 
-               r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
-                                       false);
+               r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+                                      false);
                if (r) {
                        goto error;
                } else if (wipe_fence) {
+                       amdgpu_vram_mgr_set_cleared(bo->resource);
                        dma_fence_put(fence);
                        fence = wipe_fence;
                }
@@ -403,40 +426,55 @@ error:
        return r;
 }
 
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
  *
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
  */
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
-                              struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+                           struct ttm_resource *res)
 {
-       u64 mem_size = (u64)mem->size;
        struct amdgpu_res_cursor cursor;
-       u64 end;
 
-       if (mem->mem_type == TTM_PL_SYSTEM ||
-           mem->mem_type == TTM_PL_TT)
+       if (!res)
+               return false;
+
+       if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+           res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL)
                return true;
-       if (mem->mem_type != TTM_PL_VRAM)
+
+       if (res->mem_type != TTM_PL_VRAM)
                return false;
 
-       amdgpu_res_first(mem, 0, mem_size, &cursor);
-       end = cursor.start + cursor.size;
+       amdgpu_res_first(res, 0, res->size, &cursor);
        while (cursor.remaining) {
+               if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
+                       return false;
                amdgpu_res_next(&cursor, cursor.size);
+       }
 
-               if (!cursor.remaining)
-                       break;
+       return true;
+}
 
-               /* ttm_resource_ioremap only supports contiguous memory */
-               if (end != cursor.start)
-                       return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+                               struct ttm_resource *mem)
+{
+       if (!amdgpu_res_cpu_visible(adev, mem))
+               return false;
 
-               end = cursor.start + cursor.size;
-       }
+       /* ttm_resource_ioremap only supports contiguous memory */
+       if (mem->mem_type == TTM_PL_VRAM &&
+           !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+               return false;
 
-       return end <= adev->gmc.visible_vram_size;
+       return true;
 }
 
 /*
@@ -466,14 +504,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 
        if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
                         bo->ttm == NULL)) {
+               amdgpu_bo_move_notify(bo, evict, new_mem);
                ttm_bo_move_null(bo, new_mem);
-               goto out;
+               return 0;
        }
        if (old_mem->mem_type == TTM_PL_SYSTEM &&
            (new_mem->mem_type == TTM_PL_TT ||
             new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+               amdgpu_bo_move_notify(bo, evict, new_mem);
                ttm_bo_move_null(bo, new_mem);
-               goto out;
+               return 0;
        }
        if ((old_mem->mem_type == TTM_PL_TT ||
             old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
@@ -483,9 +523,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
                        return r;
 
                amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+               amdgpu_bo_move_notify(bo, evict, new_mem);
                ttm_resource_free(bo, &bo->resource);
                ttm_bo_assign_mem(bo, new_mem);
-               goto out;
+               return 0;
        }
 
        if (old_mem->mem_type == AMDGPU_PL_GDS ||
@@ -497,8 +538,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
            new_mem->mem_type == AMDGPU_PL_OA ||
            new_mem->mem_type == AMDGPU_PL_DOORBELL) {
                /* Nothing to save here */
+               amdgpu_bo_move_notify(bo, evict, new_mem);
                ttm_bo_move_null(bo, new_mem);
-               goto out;
+               return 0;
        }
 
        if (bo->type == ttm_bo_type_device &&
@@ -510,27 +552,28 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
                abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
        }
 
-       if (adev->mman.buffer_funcs_enabled) {
-               if (((old_mem->mem_type == TTM_PL_SYSTEM &&
-                     new_mem->mem_type == TTM_PL_VRAM) ||
-                    (old_mem->mem_type == TTM_PL_VRAM &&
-                     new_mem->mem_type == TTM_PL_SYSTEM))) {
-                       hop->fpfn = 0;
-                       hop->lpfn = 0;
-                       hop->mem_type = TTM_PL_TT;
-                       hop->flags = TTM_PL_FLAG_TEMPORARY;
-                       return -EMULTIHOP;
-               }
+       if (adev->mman.buffer_funcs_enabled &&
+           ((old_mem->mem_type == TTM_PL_SYSTEM &&
+             new_mem->mem_type == TTM_PL_VRAM) ||
+            (old_mem->mem_type == TTM_PL_VRAM &&
+             new_mem->mem_type == TTM_PL_SYSTEM))) {
+               hop->fpfn = 0;
+               hop->lpfn = 0;
+               hop->mem_type = TTM_PL_TT;
+               hop->flags = TTM_PL_FLAG_TEMPORARY;
+               return -EMULTIHOP;
+       }
 
+       amdgpu_bo_move_notify(bo, evict, new_mem);
+       if (adev->mman.buffer_funcs_enabled)
                r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
-       } else {
+       else
                r = -ENODEV;
-       }
 
        if (r) {
                /* Check that all memory is CPU accessible */
-               if (!amdgpu_mem_visible(adev, old_mem) ||
-                   !amdgpu_mem_visible(adev, new_mem)) {
+               if (!amdgpu_res_copyable(adev, old_mem) ||
+                   !amdgpu_res_copyable(adev, new_mem)) {
                        pr_err("Move buffer fallback to memcpy unavailable\n");
                        return r;
                }
@@ -540,11 +583,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
                        return r;
        }
 
-       trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
-out:
-       /* update statistics */
+       /* update statistics after the move */
+       if (evict)
+               atomic64_inc(&adev->num_evictions);
        atomic64_add(bo->base.size, &adev->num_bytes_moved);
-       amdgpu_bo_move_notify(bo, evict);
        return 0;
 }
 
@@ -557,7 +599,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
                                     struct ttm_resource *mem)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       size_t bus_size = (size_t)mem->size;
 
        switch (mem->mem_type) {
        case TTM_PL_SYSTEM:
@@ -568,9 +609,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
                break;
        case TTM_PL_VRAM:
                mem->bus.offset = mem->start << PAGE_SHIFT;
-               /* check if it's visible */
-               if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
-                       return -EINVAL;
 
                if (adev->mman.aper_base_kaddr &&
                    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
@@ -824,8 +862,7 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
        int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
        uint64_t page_idx, pages_per_xcc;
        int i;
-       uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
-                       AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
+       uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
 
        pages_per_xcc = total_pages;
        do_div(pages_per_xcc, num_xcc);
@@ -864,6 +901,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
                amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
                                 gtt->ttm.dma_address, flags);
        }
+       gtt->bound = true;
 }
 
 /*
@@ -1379,7 +1417,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
         */
        dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
                                DMA_RESV_USAGE_BOOKKEEP, f) {
-               if (amdkfd_fence_check_mm(f, current->mm))
+               if (amdkfd_fence_check_mm(f, current->mm) &&
+                   !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
                        return false;
        }
 
@@ -1476,7 +1515,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
                swap(src_addr, dst_addr);
 
        amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
-                               PAGE_SIZE, false);
+                               PAGE_SIZE, 0);
 
        amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
        WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -1547,7 +1586,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 static void
 amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
 {
-       amdgpu_bo_move_notify(bo, false);
+       amdgpu_bo_move_notify(bo, false, NULL);
 }
 
 static struct ttm_device_funcs amdgpu_bo_driver = {
@@ -1720,7 +1759,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
                        amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
 
        if (!adev->bios &&
-           amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
+           (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+            amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)))
                reserve_size = max(reserve_size, (uint32_t)280 << 20);
        else if (!reserve_size)
                reserve_size = DISCOVERY_TMR_OFFSET;
@@ -2127,7 +2167,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
                       uint64_t dst_offset, uint32_t byte_count,
                       struct dma_resv *resv,
                       struct dma_fence **fence, bool direct_submit,
-                      bool vm_needs_flush, bool tmz)
+                      bool vm_needs_flush, uint32_t copy_flags)
 {
        struct amdgpu_device *adev = ring->adev;
        unsigned int num_loops, num_dw;
@@ -2153,8 +2193,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
                uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
                amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
-                                       dst_offset, cur_size_in_bytes, tmz);
-
+                                       dst_offset, cur_size_in_bytes, copy_flags);
                src_offset += cur_size_in_bytes;
                dst_offset += cur_size_in_bytes;
                byte_count -= cur_size_in_bytes;
@@ -2214,6 +2253,71 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
        return 0;
 }
 
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+                           struct dma_resv *resv,
+                           struct dma_fence **fence)
+{
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+       struct amdgpu_res_cursor cursor;
+       u64 addr;
+       int r;
+
+       if (!adev->mman.buffer_funcs_enabled)
+               return -EINVAL;
+
+       if (!fence)
+               return -EINVAL;
+
+       *fence = dma_fence_get_stub();
+
+       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+       mutex_lock(&adev->mman.gtt_window_lock);
+       while (cursor.remaining) {
+               struct dma_fence *next = NULL;
+               u64 size;
+
+               if (amdgpu_res_cleared(&cursor)) {
+                       amdgpu_res_next(&cursor, cursor.size);
+                       continue;
+               }
+
+               /* Never clear more than 256MiB at once to avoid timeouts */
+               size = min(cursor.size, 256ULL << 20);
+
+               r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+                                         1, ring, false, &size, &addr);
+               if (r)
+                       goto err;
+
+               r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+                                       &next, true, true);
+               if (r)
+                       goto err;
+
+               dma_fence_put(*fence);
+               *fence = next;
+
+               amdgpu_res_next(&cursor, size);
+       }
+err:
+       mutex_unlock(&adev->mman.gtt_window_lock);
+
+       return r;
+}
+
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                        uint32_t src_data,
                        struct dma_resv *resv,