/* Don't handle scatter gather BOs */
if (bo->type == ttm_bo_type_sg) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
/* Object isn't an AMDGPU object so ignore */
if (!amdgpu_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
- placement->busy_placement = &placements;
placement->num_placement = 1;
- placement->num_busy_placement = 1;
return;
}
abo = ttm_to_amdgpu_bo(bo);
if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) {
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
}
case AMDGPU_PL_OA:
case AMDGPU_PL_DOORBELL:
placement->num_placement = 0;
- placement->num_busy_placement = 0;
return;
case TTM_PL_VRAM:
if (!adev->mman.buffer_funcs_enabled) {
/* Move to system memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
- amdgpu_bo_in_cpu_visible_vram(abo)) {
+ amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
AMDGPU_GEM_DOMAIN_CPU);
abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
abo->placements[0].lpfn = 0;
- abo->placement.busy_placement = &abo->placements[1];
- abo->placement.num_busy_placement = 1;
+ abo->placements[0].flags |= TTM_PL_FLAG_DESIRED;
} else {
/* Move to GTT memory */
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
struct amdgpu_res_cursor src_mm, dst_mm;
struct dma_fence *fence = NULL;
int r = 0;
+ uint32_t copy_flags = 0;
+ struct amdgpu_bo *abo_src, *abo_dst;
if (!adev->mman.buffer_funcs_enabled) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
mutex_lock(&adev->mman.gtt_window_lock);
while (src_mm.remaining) {
- uint64_t from, to, cur_size;
+ uint64_t from, to, cur_size, tiling_flags;
+ uint32_t num_type, data_format, max_com;
struct dma_fence *next;
/* Never copy more than 256MiB at once to avoid a timeout */
if (r)
goto error;
- r = amdgpu_copy_buffer(ring, from, to, cur_size,
- resv, &next, false, true, tmz);
+ abo_src = ttm_to_amdgpu_bo(src->bo);
+ abo_dst = ttm_to_amdgpu_bo(dst->bo);
+ if (tmz)
+ copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+ if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (abo_src->tbo.resource->mem_type == TTM_PL_VRAM))
+ copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED;
+ if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) &&
+ (dst->mem->mem_type == TTM_PL_VRAM)) {
+ copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED;
+ amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags);
+ max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
+ num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
+ data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
+ copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) |
+ AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) |
+ AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format));
+ }
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ &next, false, true, copy_flags);
if (r)
goto error;
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
- false);
+ r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ false);
if (r) {
goto error;
} else if (wipe_fence) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
dma_fence_put(fence);
fence = wipe_fence;
}
return r;
}
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
*
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
*/
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
- struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+ struct ttm_resource *res)
{
- u64 mem_size = (u64)mem->size;
struct amdgpu_res_cursor cursor;
- u64 end;
- if (mem->mem_type == TTM_PL_SYSTEM ||
- mem->mem_type == TTM_PL_TT)
+ if (!res)
+ return false;
+
+ if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+ res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL)
return true;
- if (mem->mem_type != TTM_PL_VRAM)
+
+ if (res->mem_type != TTM_PL_VRAM)
return false;
- amdgpu_res_first(mem, 0, mem_size, &cursor);
- end = cursor.start + cursor.size;
+ amdgpu_res_first(res, 0, res->size, &cursor);
while (cursor.remaining) {
+ if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size)
+ return false;
amdgpu_res_next(&cursor, cursor.size);
+ }
- if (!cursor.remaining)
- break;
+ return true;
+}
- /* ttm_resource_ioremap only supports contiguous memory */
- if (end != cursor.start)
- return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+ struct ttm_resource *mem)
+{
+ if (!amdgpu_res_cpu_visible(adev, mem))
+ return false;
- end = cursor.start + cursor.size;
- }
+ /* ttm_resource_ioremap only supports contiguous memory */
+ if (mem->mem_type == TTM_PL_VRAM &&
+ !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+ return false;
- return end <= adev->gmc.visible_vram_size;
+ return true;
}
/*
if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
bo->ttm == NULL)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == TTM_PL_SYSTEM &&
(new_mem->mem_type == TTM_PL_TT ||
new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if ((old_mem->mem_type == TTM_PL_TT ||
old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
return r;
amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_resource_free(bo, &bo->resource);
ttm_bo_assign_mem(bo, new_mem);
- goto out;
+ return 0;
}
if (old_mem->mem_type == AMDGPU_PL_GDS ||
new_mem->mem_type == AMDGPU_PL_OA ||
new_mem->mem_type == AMDGPU_PL_DOORBELL) {
/* Nothing to save here */
+ amdgpu_bo_move_notify(bo, evict, new_mem);
ttm_bo_move_null(bo, new_mem);
- goto out;
+ return 0;
}
if (bo->type == ttm_bo_type_device &&
abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
}
- if (adev->mman.buffer_funcs_enabled) {
- if (((old_mem->mem_type == TTM_PL_SYSTEM &&
- new_mem->mem_type == TTM_PL_VRAM) ||
- (old_mem->mem_type == TTM_PL_VRAM &&
- new_mem->mem_type == TTM_PL_SYSTEM))) {
- hop->fpfn = 0;
- hop->lpfn = 0;
- hop->mem_type = TTM_PL_TT;
- hop->flags = TTM_PL_FLAG_TEMPORARY;
- return -EMULTIHOP;
- }
+ if (adev->mman.buffer_funcs_enabled &&
+ ((old_mem->mem_type == TTM_PL_SYSTEM &&
+ new_mem->mem_type == TTM_PL_VRAM) ||
+ (old_mem->mem_type == TTM_PL_VRAM &&
+ new_mem->mem_type == TTM_PL_SYSTEM))) {
+ hop->fpfn = 0;
+ hop->lpfn = 0;
+ hop->mem_type = TTM_PL_TT;
+ hop->flags = TTM_PL_FLAG_TEMPORARY;
+ return -EMULTIHOP;
+ }
+ amdgpu_bo_move_notify(bo, evict, new_mem);
+ if (adev->mman.buffer_funcs_enabled)
r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
- } else {
+ else
r = -ENODEV;
- }
if (r) {
/* Check that all memory is CPU accessible */
- if (!amdgpu_mem_visible(adev, old_mem) ||
- !amdgpu_mem_visible(adev, new_mem)) {
+ if (!amdgpu_res_copyable(adev, old_mem) ||
+ !amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}
return r;
}
- trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
-out:
- /* update statistics */
+ /* update statistics after the move */
+ if (evict)
+ atomic64_inc(&adev->num_evictions);
atomic64_add(bo->base.size, &adev->num_bytes_moved);
- amdgpu_bo_move_notify(bo, evict);
return 0;
}
struct ttm_resource *mem)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
- size_t bus_size = (size_t)mem->size;
switch (mem->mem_type) {
case TTM_PL_SYSTEM:
break;
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
- /* check if it's visible */
- if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
- return -EINVAL;
if (adev->mman.aper_base_kaddr &&
mem->placement & TTM_PL_FLAG_CONTIGUOUS)
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc;
int i;
- uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
- AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
+ uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
pages_per_xcc = total_pages;
do_div(pages_per_xcc, num_xcc);
amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
gtt->ttm.dma_address, flags);
}
+ gtt->bound = true;
}
/*
/* allocate GART space */
placement.num_placement = 1;
placement.placement = &placements;
- placement.num_busy_placement = 1;
- placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.mem_type = TTM_PL_TT;
*/
dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP, f) {
- if (amdkfd_fence_check_mm(f, current->mm))
+ if (amdkfd_fence_check_mm(f, current->mm) &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS))
return false;
}
swap(src_addr, dst_addr);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
- PAGE_SIZE, false);
+ PAGE_SIZE, 0);
amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
static void
amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
{
- amdgpu_bo_move_notify(bo, false);
+ amdgpu_bo_move_notify(bo, false, NULL);
}
static struct ttm_device_funcs amdgpu_bo_driver = {
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
if (!adev->bios &&
- amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
+ amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz)
+ bool vm_needs_flush, uint32_t copy_flags)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
- dst_offset, cur_size_in_bytes, tmz);
-
+ dst_offset, cur_size_in_bytes, copy_flags);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
return 0;
}
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor cursor;
+ u64 addr;
+ int r;
+
+ if (!adev->mman.buffer_funcs_enabled)
+ return -EINVAL;
+
+ if (!fence)
+ return -EINVAL;
+
+ *fence = dma_fence_get_stub();
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (cursor.remaining) {
+ struct dma_fence *next = NULL;
+ u64 size;
+
+ if (amdgpu_res_cleared(&cursor)) {
+ amdgpu_res_next(&cursor, cursor.size);
+ continue;
+ }
+
+ /* Never clear more than 256MiB at once to avoid timeouts */
+ size = min(cursor.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ 1, ring, false, &size, &addr);
+ if (r)
+ goto err;
+
+ r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ &next, true, true);
+ if (r)
+ goto err;
+
+ dma_fence_put(*fence);
+ *fence = next;
+
+ amdgpu_res_next(&cursor, size);
+ }
+err:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ return r;
+}
+
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,