We have three ib pools, they are normal, VM, direct pools.
Any jobs which schedule IBs without dependence on gpu scheduler should
use DIRECT pool.
Any jobs schedule direct VM update IBs should use VM pool.
Any other jobs use NORMAL pool.
v2: squash in coding style fix
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
int amdgpu_fence_slab_init(void);
void amdgpu_fence_slab_fini(void);
+enum amdgpu_ib_pool_type {
+ AMDGPU_IB_POOL_NORMAL = 0,
+ AMDGPU_IB_POOL_VM,
+ AMDGPU_IB_POOL_DIRECT,
+
+ AMDGPU_IB_POOL_MAX
+};
/*
* IRQS.
*/
int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- unsigned size, struct amdgpu_ib *ib);
+ unsigned size,
+ enum amdgpu_ib_pool_type pool,
+ struct amdgpu_ib *ib);
void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
struct dma_fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
unsigned num_rings;
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
bool ib_pool_ready;
- struct amdgpu_sa_manager ring_tmp_bo;
+ struct amdgpu_sa_manager ring_tmp_bo[AMDGPU_IB_POOL_MAX];
/* interrupts */
struct amdgpu_irq irq;
ring = to_amdgpu_ring(entity->rq->sched);
r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
- chunk_ib->ib_bytes : 0, ib);
+ chunk_ib->ib_bytes : 0, AMDGPU_IB_POOL_NORMAL, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
* Returns 0 on success, error on failure.
*/
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- unsigned size, struct amdgpu_ib *ib)
+ unsigned size,
+ enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_ib *ib)
{
int r;
if (size) {
- r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
+ r = amdgpu_sa_bo_new(&adev->ring_tmp_bo[pool_type],
&ib->sa_bo, size, 256);
if (r) {
dev_err(adev->dev, "failed to get a new IB (%d)\n", r);
*/
int amdgpu_ib_pool_init(struct amdgpu_device *adev)
{
- int r;
+ int r, i;
+ unsigned size;
if (adev->ib_pool_ready) {
return 0;
}
- r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo,
- AMDGPU_IB_POOL_SIZE*64*1024,
- AMDGPU_GPU_PAGE_SIZE,
- AMDGPU_GEM_DOMAIN_GTT);
- if (r) {
- return r;
+ for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
+ if (i == AMDGPU_IB_POOL_DIRECT)
+ size = PAGE_SIZE * 2;
+ else
+ size = AMDGPU_IB_POOL_SIZE*64*1024;
+ r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo[i],
+ size,
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT);
+ if (r) {
+ for (i--; i >= 0; i--)
+ amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]);
+ return r;
+ }
}
-
adev->ib_pool_ready = true;
return 0;
*/
void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
{
+ int i;
+
if (adev->ib_pool_ready) {
- amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
+ for (i = 0; i < AMDGPU_IB_POOL_MAX; i++)
+ amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]);
adev->ib_pool_ready = false;
}
}
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
- amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo, m);
+ seq_printf(m, "-------------------- NORMAL -------------------- \n");
+ amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_NORMAL], m);
+ seq_printf(m, "---------------------- VM ---------------------- \n");
+ amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_VM], m);
+ seq_printf(m, "-------------------- DIRECT--------------------- \n");
+ amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_DIRECT], m);
return 0;
}
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- struct amdgpu_job **job)
+ enum amdgpu_ib_pool_type pool_type,
+ struct amdgpu_job **job)
{
int r;
if (r)
return r;
- r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
+ r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
if (r)
kfree(*job);
#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0)
struct amdgpu_fence;
+enum amdgpu_ib_pool_type;
struct amdgpu_job {
struct drm_sched_job base;
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
struct amdgpu_job **job, struct amdgpu_vm *vm);
int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
- struct amdgpu_job **job);
-
+ enum amdgpu_ib_pool_type pool, struct amdgpu_job **job);
void amdgpu_job_free_resources(struct amdgpu_job *job);
void amdgpu_job_free(struct amdgpu_job *job);
int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
const unsigned ib_size_dw = 16;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8;
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, &job);
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+ AMDGPU_IB_POOL_NORMAL, &job);
if (r)
return r;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
+ direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job);
if (r)
return r;
/* for IB padding */
num_dw += 64;
- r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_NORMAL, &job);
if (r)
return r;
goto err;
}
- r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+ r = amdgpu_job_alloc_with_ib(adev, 64,
+ direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job);
if (r)
goto err;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
struct dma_fence *f = NULL;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job);
if (r)
return r;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+ r = amdgpu_job_alloc_with_ib(adev, 64,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
goto err;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
int r;
- r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
+ r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4,
+ p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job);
if (r)
return r;
ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
- r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job);
+ r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4,
+ p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job);
if (r)
return r;
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err0;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16, &ib);
+ r = amdgpu_ib_get(adev, NULL, 16,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16, &ib);
+ r = amdgpu_ib_get(adev, NULL, 16,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
/* allocate an indirect buffer to put the commands in */
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+ r = amdgpu_ib_get(adev, NULL, total_size,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16, &ib);
+ r = amdgpu_ib_get(adev, NULL, 16,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
/* allocate an indirect buffer to put the commands in */
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, total_size, &ib);
+ r = amdgpu_ib_get(adev, NULL, total_size,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
return r;
* translation. Avoid this by doing the invalidation from the SDMA
* itself.
*/
- r = amdgpu_job_alloc_with_ib(adev, 16 * 4, &job);
+ r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_VM, &job);
if (r)
goto error_alloc;
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err0;
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err0;
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err0;
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256,
+ AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err0;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;
uint64_t addr;
int i, r;
- r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+ AMDGPU_IB_POOL_DIRECT, &job);
if (r)
return r;