drm/amdgpu: Refactor amdgpu_move_blit

author Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>

Tue, 3 Oct 2017 19:41:56 +0000 (15:41 -0400)

committer Alex Deucher <alexander.deucher@amd.com>

Thu, 19 Oct 2017 19:27:07 +0000 (15:27 -0400)
author Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Tue, 3 Oct 2017 19:41:56 +0000 (15:41 -0400)
committer Alex Deucher <alexander.deucher@amd.com>
Thu, 19 Oct 2017 19:27:07 +0000 (15:27 -0400)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c

index a2282ba..382c0ae 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -290,97 +290,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
         return addr;
  }
  
-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
-                           bool evict, bool no_wait_gpu,
-                           struct ttm_mem_reg *new_mem,
-                           struct ttm_mem_reg *old_mem)
+/**
+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ * @f: Returns the last fence if multiple jobs are submitted.
+ */
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+                              struct amdgpu_copy_mem *src,
+                              struct amdgpu_copy_mem *dst,
+                              uint64_t size,
+                              struct reservation_object *resv,
+                              struct dma_fence **f)
  {
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
         struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
-
-       struct drm_mm_node *old_mm, *new_mm;
-       uint64_t old_start, old_size, new_start, new_size;
-       unsigned long num_pages;
+       struct drm_mm_node *src_mm, *dst_mm;
+       uint64_t src_node_start, dst_node_start, src_node_size,
+                dst_node_size, src_page_offset, dst_page_offset;
         struct dma_fence *fence = NULL;
-       int r;
-
-       BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+       int r = 0;
+       const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
+                                       AMDGPU_GPU_PAGE_SIZE);
  
         if (!ring->ready) {
                 DRM_ERROR("Trying to move memory with ring turned off.\n");
                 return -EINVAL;
         }
  
-       old_mm = old_mem->mm_node;
-       old_size = old_mm->size;
-       old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
+       src_mm = src->mem->mm_node;
+       while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
+               src->offset -= (src_mm->size << PAGE_SHIFT);
+               ++src_mm;
+       }
+       src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
+                                            src->offset;
+       src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
+       src_page_offset = src_node_start & (PAGE_SIZE - 1);
  
-       new_mm = new_mem->mm_node;
-       new_size = new_mm->size;
-       new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
+       dst_mm = dst->mem->mm_node;
+       while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
+               dst->offset -= (dst_mm->size << PAGE_SHIFT);
+               ++dst_mm;
+       }
+       dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
+                                            dst->offset;
+       dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
+       dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
  
-       num_pages = new_mem->num_pages;
         mutex_lock(&adev->mman.gtt_window_lock);
-       while (num_pages) {
-               unsigned long cur_pages = min(min(old_size, new_size),
-                                             (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
-               uint64_t from = old_start, to = new_start;
+
+       while (size) {
+               unsigned long cur_size;
+               uint64_t from = src_node_start, to = dst_node_start;
                 struct dma_fence *next;
  
-               if (old_mem->mem_type == TTM_PL_TT &&
-                   !amdgpu_gtt_mgr_is_allocated(old_mem)) {
-                       r = amdgpu_map_buffer(bo, old_mem, cur_pages,
-                                             old_start, 0, ring, &from);
+               /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
+                * begins at an offset, then adjust the size accordingly
+                */
+               cur_size = min3(min(src_node_size, dst_node_size), size,
+                               GTT_MAX_BYTES);
+               if (cur_size + src_page_offset > GTT_MAX_BYTES ||
+                   cur_size + dst_page_offset > GTT_MAX_BYTES)
+                       cur_size -= max(src_page_offset, dst_page_offset);
+
+               /* Map only what needs to be accessed. Map src to window 0 and
+                * dst to window 1
+                */
+               if (src->mem->mem_type == TTM_PL_TT &&
+                   !amdgpu_gtt_mgr_is_allocated(src->mem)) {
+                       r = amdgpu_map_buffer(src->bo, src->mem,
+                                       PFN_UP(cur_size + src_page_offset),
+                                       src_node_start, 0, ring,
+                                       &from);
                         if (r)
                                 goto error;
+                       /* Adjust the offset because amdgpu_map_buffer returns
+                        * start of mapped page
+                        */
+                       from += src_page_offset;
                 }
  
-               if (new_mem->mem_type == TTM_PL_TT &&
-                   !amdgpu_gtt_mgr_is_allocated(new_mem)) {
-                       r = amdgpu_map_buffer(bo, new_mem, cur_pages,
-                                             new_start, 1, ring, &to);
+               if (dst->mem->mem_type == TTM_PL_TT &&
+                   !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
+                       r = amdgpu_map_buffer(dst->bo, dst->mem,
+                                       PFN_UP(cur_size + dst_page_offset),
+                                       dst_node_start, 1, ring,
+                                       &to);
                         if (r)
                                 goto error;
+                       to += dst_page_offset;
                 }
  
-               r = amdgpu_copy_buffer(ring, from, to,
-                                      cur_pages * PAGE_SIZE,
-                                      bo->resv, &next, false, true);
+               r = amdgpu_copy_buffer(ring, from, to, cur_size,
+                                      resv, &next, false, true);
                 if (r)
                         goto error;
  
                 dma_fence_put(fence);
                 fence = next;
  
-               num_pages -= cur_pages;
-               if (!num_pages)
+               size -= cur_size;
+               if (!size)
                         break;
  
-               old_size -= cur_pages;
-               if (!old_size) {
-                       old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
-                       old_size = old_mm->size;
+               src_node_size -= cur_size;
+               if (!src_node_size) {
+                       src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
+                                                            src->mem);
+                       src_node_size = (src_mm->size << PAGE_SHIFT);
                 } else {
-                       old_start += cur_pages * PAGE_SIZE;
+                       src_node_start += cur_size;
+                       src_page_offset = src_node_start & (PAGE_SIZE - 1);
                 }
-
-               new_size -= cur_pages;
-               if (!new_size) {
-                       new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
-                       new_size = new_mm->size;
+               dst_node_size -= cur_size;
+               if (!dst_node_size) {
+                       dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
+                                                            dst->mem);
+                       dst_node_size = (dst_mm->size << PAGE_SHIFT);
                 } else {
-                       new_start += cur_pages * PAGE_SIZE;
+                       dst_node_start += cur_size;
+                       dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
                 }
         }
+error:
         mutex_unlock(&adev->mman.gtt_window_lock);
+       if (f)
+               *f = dma_fence_get(fence);
+       dma_fence_put(fence);
+       return r;
+}
+
+
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+                           bool evict, bool no_wait_gpu,
+                           struct ttm_mem_reg *new_mem,
+                           struct ttm_mem_reg *old_mem)
+{
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+       struct amdgpu_copy_mem src, dst;
+       struct dma_fence *fence = NULL;
+       int r;
+
+       src.bo = bo;
+       dst.bo = bo;
+       src.mem = old_mem;
+       dst.mem = new_mem;
+       src.offset = 0;
+       dst.offset = 0;
+
+       r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+                                      new_mem->num_pages << PAGE_SHIFT,
+                                      bo->resv, &fence);
+       if (r)
+               goto error;
  
         r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
         dma_fence_put(fence);
         return r;
  
  error:
-       mutex_unlock(&adev->mman.gtt_window_lock);
-
         if (fence)
                 dma_fence_wait(fence, false);
         dma_fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h

index 7abae68..abd4084 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -58,6 +58,12 @@ struct amdgpu_mman {
         struct amd_sched_entity                 entity;
  };
  
+struct amdgpu_copy_mem {
+       struct ttm_buffer_object        *bo;
+       struct ttm_mem_reg              *mem;
+       unsigned long                   offset;
+};
+
  extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
  extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
  
@@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
                        struct reservation_object *resv,
                        struct dma_fence **fence, bool direct_submit,
                        bool vm_needs_flush);
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+                              struct amdgpu_copy_mem *src,
+                              struct amdgpu_copy_mem *dst,
+                              uint64_t size,
+                              struct reservation_object *resv,
+                              struct dma_fence **f);
  int amdgpu_fill_buffer(struct amdgpu_bo *bo,
                         uint64_t src_data,
                         struct reservation_object *resv,
author	Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
	Tue, 3 Oct 2017 19:41:56 +0000 (15:41 -0400)
committer	Alex Deucher <alexander.deucher@amd.com>
	Thu, 19 Oct 2017 19:27:07 +0000 (15:27 -0400)
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h		patch \| blob \| history