/** list of vma that have execobj.relocation_count */
struct list_head relocs;
+ struct i915_gem_ww_ctx ww;
+
/**
* Track the most recently used object for relocations, as we
* frequently have to perform multiple relocations within the same
struct i915_request *rq;
u32 *rq_cmd;
unsigned int rq_size;
+ struct intel_gt_buffer_pool_node *pool;
} reloc_cache;
+ struct intel_gt_buffer_pool_node *reloc_pool; /** relocation pool for -EDEADLK handling */
+
u64 invalid_flags; /** Set of execobj.flags that are invalid */
u32 context_flags; /** Set of execobj.flags to insert from the ctx */
u32 batch_start_offset; /** Location within object of batch */
u32 batch_len; /** Length of batch within object */
u32 batch_flags; /** Flags composed for emit_bb_start() */
+ struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
/**
* Indicate either the size of the hastable used to resolve
return !eb_vma_misplaced(entry, vma, ev->flags);
}
-static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
-{
- GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
-
- if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
- __i915_vma_unpin_fence(vma);
-
- __i915_vma_unpin(vma);
-}
-
static inline void
eb_unreserve_vma(struct eb_vma *ev)
{
if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
return;
- __eb_unreserve_vma(ev->vma, ev->flags);
+ if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+ __i915_vma_unpin_fence(ev->vma);
+
+ __i915_vma_unpin(ev->vma);
ev->flags &= ~__EXEC_OBJECT_RESERVED;
}
eb->batch = ev;
}
-
- if (eb_pin_vma(eb, entry, ev)) {
- if (entry->offset != vma->node.start) {
- entry->offset = vma->node.start | UPDATE;
- eb->args->flags |= __EXEC_HAS_RELOC;
- }
- } else {
- eb_unreserve_vma(ev);
- list_add_tail(&ev->bind_link, &eb->unbound);
- }
}
static inline int use_cpu_reloc(const struct reloc_cache *cache,
* This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
*/
-
- if (mutex_lock_interruptible(&eb->i915->drm.struct_mutex))
- return -EINTR;
-
pass = 0;
do {
list_for_each_entry(ev, &eb->unbound, bind_link) {
break;
}
if (err != -ENOSPC)
- break;
+ return err;
/* Resort *all* the objects into priority order */
INIT_LIST_HEAD(&eb->unbound);
err = i915_gem_evict_vm(eb->context->vm);
mutex_unlock(&eb->context->vm->mutex);
if (err)
- goto unlock;
+ return err;
break;
default:
- err = -ENOSPC;
- goto unlock;
+ return -ENOSPC;
}
pin_flags = PIN_USER;
} while (1);
-
-unlock:
- mutex_unlock(&eb->i915->drm.struct_mutex);
- return err;
}
static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
int err = 0;
INIT_LIST_HEAD(&eb->relocs);
- INIT_LIST_HEAD(&eb->unbound);
for (i = 0; i < eb->buffer_count; i++) {
struct i915_vma *vma;
return err;
}
+static int eb_validate_vmas(struct i915_execbuffer *eb)
+{
+ unsigned int i;
+ int err;
+
+ INIT_LIST_HEAD(&eb->unbound);
+
+ for (i = 0; i < eb->buffer_count; i++) {
+ struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+ struct eb_vma *ev = &eb->vma[i];
+ struct i915_vma *vma = ev->vma;
+
+ err = i915_gem_object_lock(vma->obj, &eb->ww);
+ if (err)
+ return err;
+
+ if (eb_pin_vma(eb, entry, ev)) {
+ if (entry->offset != vma->node.start) {
+ entry->offset = vma->node.start | UPDATE;
+ eb->args->flags |= __EXEC_HAS_RELOC;
+ }
+ } else {
+ eb_unreserve_vma(ev);
+
+ list_add_tail(&ev->bind_link, &eb->unbound);
+ if (drm_mm_node_allocated(&vma->node)) {
+ err = i915_vma_unbind(vma);
+ if (err)
+ return err;
+ }
+ }
+
+ GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+ eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
+ }
+
+ if (!list_empty(&eb->unbound))
+ return eb_reserve(eb);
+
+ return 0;
+}
+
static struct eb_vma *
eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
{
}
}
-static void eb_release_vmas(const struct i915_execbuffer *eb)
+static void eb_release_vmas(const struct i915_execbuffer *eb, bool final)
{
const unsigned int count = eb->buffer_count;
unsigned int i;
if (!vma)
break;
- eb->vma[i].vma = NULL;
-
- if (ev->flags & __EXEC_OBJECT_HAS_PIN)
- __eb_unreserve_vma(vma, ev->flags);
+ eb_unreserve_vma(ev);
- i915_vma_put(vma);
+ if (final)
+ i915_vma_put(vma);
}
}
return gen8_canonical_addr((int)reloc->delta + target->node.start);
}
+static void reloc_cache_clear(struct reloc_cache *cache)
+{
+ cache->rq = NULL;
+ cache->rq_cmd = NULL;
+ cache->pool = NULL;
+ cache->rq_size = 0;
+}
+
static void reloc_cache_init(struct reloc_cache *cache,
struct drm_i915_private *i915)
{
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.flags = 0;
- cache->rq = NULL;
- cache->rq_size = 0;
+ reloc_cache_clear(cache);
}
static inline void *unmask_page(unsigned long p)
return &i915->ggtt;
}
-static void reloc_gpu_flush(struct reloc_cache *cache)
+static void reloc_cache_put_pool(struct i915_execbuffer *eb, struct reloc_cache *cache)
+{
+ if (!cache->pool)
+ return;
+
+ /*
+ * This is a bit nasty, normally we keep objects locked until the end
+ * of execbuffer, but we already submit this, and have to unlock before
+ * dropping the reference. Fortunately we can only hold 1 pool node at
+ * a time, so this should be harmless.
+ */
+ i915_gem_ww_unlock_single(cache->pool->obj);
+ intel_gt_buffer_pool_put(cache->pool);
+ cache->pool = NULL;
+}
+
+static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cache)
{
struct drm_i915_gem_object *obj = cache->rq->batch->obj;
intel_gt_chipset_flush(cache->rq->engine->gt);
i915_request_add(cache->rq);
- cache->rq = NULL;
+ reloc_cache_put_pool(eb, cache);
+ reloc_cache_clear(cache);
+
+ eb->reloc_pool = NULL;
}
-static void reloc_cache_reset(struct reloc_cache *cache)
+static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
{
void *vaddr;
if (cache->rq)
- reloc_gpu_flush(cache);
+ reloc_gpu_flush(eb, cache);
if (!cache->vaddr)
return;
kunmap_atomic(vaddr);
i915_gem_object_finish_access(obj);
- i915_gem_object_unlock(obj);
} else {
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned int flushes;
int err;
- err = i915_gem_object_lock_interruptible(obj, NULL);
+ err = i915_gem_object_prepare_write(obj, &flushes);
if (err)
return ERR_PTR(err);
- err = i915_gem_object_prepare_write(obj, &flushes);
- if (err) {
- i915_gem_object_unlock(obj);
- return ERR_PTR(err);
- }
-
BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
if (use_cpu_reloc(cache, obj))
return NULL;
- i915_gem_object_lock(obj, NULL);
err = i915_gem_object_set_to_gtt_domain(obj, true);
- i915_gem_object_unlock(obj);
if (err)
return ERR_PTR(err);
struct drm_i915_gem_object *obj = vma->obj;
int err;
- i915_vma_lock(vma);
+ assert_vma_held(vma);
if (obj->cache_dirty & ~obj->cache_coherent)
i915_gem_clflush_object(obj, 0);
if (err == 0)
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
- i915_vma_unlock(vma);
-
return err;
}
unsigned int len)
{
struct reloc_cache *cache = &eb->reloc_cache;
- struct intel_gt_buffer_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool = eb->reloc_pool;
struct i915_request *rq;
struct i915_vma *batch;
u32 *cmd;
int err;
- pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
+ if (!pool) {
+ pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+ }
+ eb->reloc_pool = NULL;
+
+ err = i915_gem_object_lock(pool->obj, &eb->ww);
+ if (err)
+ goto err_pool;
cmd = i915_gem_object_pin_map(pool->obj,
cache->has_llc ?
I915_MAP_FORCE_WC);
if (IS_ERR(cmd)) {
err = PTR_ERR(cmd);
- goto out_pool;
+ goto err_pool;
}
batch = i915_vma_instance(pool->obj, vma->vm, NULL);
if (err)
goto skip_request;
- i915_vma_lock(batch);
+ assert_vma_held(batch);
err = i915_request_await_object(rq, batch->obj, false);
if (err == 0)
err = i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unlock(batch);
if (err)
goto skip_request;
cache->rq = rq;
cache->rq_cmd = cmd;
cache->rq_size = 0;
+ cache->pool = pool;
/* Return with batch mapping (cmd) still pinned */
- goto out_pool;
+ return 0;
skip_request:
i915_request_set_error_once(rq, err);
i915_vma_unpin(batch);
err_unmap:
i915_gem_object_unpin_map(pool->obj);
-out_pool:
- intel_gt_buffer_pool_put(pool);
+err_pool:
+ eb->reloc_pool = pool;
return err;
}
u32 *cmd;
if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
- reloc_gpu_flush(cache);
+ reloc_gpu_flush(eb, cache);
if (unlikely(!cache->rq)) {
int err;
return addr + offset_in_page(offset);
}
-static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+static int __reloc_entry_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
u64 offset,
u64 target_addr)
len = 3;
batch = reloc_gpu(eb, vma, len);
- if (IS_ERR(batch))
+ if (batch == ERR_PTR(-EDEADLK))
+ return (s64)-EDEADLK;
+ else if (IS_ERR(batch))
return false;
addr = gen8_canonical_addr(vma->node.start + offset);
return true;
}
-static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+static int reloc_entry_gpu(struct i915_execbuffer *eb,
struct i915_vma *vma,
u64 offset,
u64 target_addr)
{
u64 target_addr = relocation_target(reloc, target);
u64 offset = reloc->offset;
+ int reloc_gpu = reloc_entry_gpu(eb, vma, offset, target_addr);
+
+ if (reloc_gpu < 0)
+ return reloc_gpu;
- if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+ if (!reloc_gpu) {
bool wide = eb->reloc_cache.use_64bit_reloc;
void *vaddr;
urelocs += ARRAY_SIZE(stack);
} while (remain);
out:
- reloc_cache_reset(&eb->reloc_cache);
+ reloc_cache_reset(&eb->reloc_cache, eb);
return remain;
}
}
err = 0;
err:
- reloc_cache_reset(&eb->reloc_cache);
+ reloc_cache_reset(&eb->reloc_cache, eb);
return err;
}
goto out;
}
+ /* We may process another execbuffer during the unlock... */
+ eb_release_vmas(eb, false);
+ i915_gem_ww_ctx_fini(&eb->ww);
+
/*
* We take 3 passes through the slowpatch.
*
flush_workqueue(eb->i915->mm.userptr_wq);
+ i915_gem_ww_ctx_init(&eb->ww, true);
if (err)
goto out;
- err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex);
+ /* reacquire the objects */
+repeat_validate:
+ err = eb_validate_vmas(eb);
if (err)
- goto out;
+ goto err;
+
+ GEM_BUG_ON(!eb->batch);
list_for_each_entry(ev, &eb->relocs, reloc_link) {
if (!have_copy) {
}
}
- mutex_unlock(&eb->i915->drm.struct_mutex);
+ if (err == -EDEADLK)
+ goto err;
+
if (err && !have_copy)
goto repeat;
*/
err:
+ if (err == -EDEADLK) {
+ eb_release_vmas(eb, false);
+ err = i915_gem_ww_ctx_backoff(&eb->ww);
+ if (!err)
+ goto repeat_validate;
+ }
+
if (err == -EAGAIN)
goto repeat;
{
int err;
- err = eb_lookup_vmas(eb);
- if (err)
- return err;
-
- if (!list_empty(&eb->unbound)) {
- err = eb_reserve(eb);
- if (err)
- return err;
- }
+retry:
+ err = eb_validate_vmas(eb);
+ if (err == -EAGAIN)
+ goto slow;
+ else if (err)
+ goto err;
/* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) {
break;
}
- if (err)
- return eb_relocate_parse_slow(eb);
+ if (err == -EDEADLK)
+ goto err;
+ else if (err)
+ goto slow;
+ }
+
+ if (!err)
+ err = eb_parse(eb);
+
+err:
+ if (err == -EDEADLK) {
+ eb_release_vmas(eb, false);
+ err = i915_gem_ww_ctx_backoff(&eb->ww);
+ if (!err)
+ goto retry;
}
- return eb_parse(eb);
+ return err;
+
+slow:
+ err = eb_relocate_parse_slow(eb);
+ if (err)
+ /*
+ * If the user expects the execobject.offset and
+ * reloc.presumed_offset to be an exact match,
+ * as for using NO_RELOC, then we cannot update
+ * the execobject.offset until we have completed
+ * relocation.
+ */
+ eb->args->flags &= ~__EXEC_HAS_RELOC;
+
+ return err;
}
static int eb_move_to_gpu(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
- struct ww_acquire_ctx acquire;
- unsigned int i;
+ unsigned int i = count;
int err = 0;
- ww_acquire_init(&acquire, &reservation_ww_class);
-
- for (i = 0; i < count; i++) {
- struct eb_vma *ev = &eb->vma[i];
- struct i915_vma *vma = ev->vma;
-
- err = ww_mutex_lock_interruptible(&vma->resv->lock, &acquire);
- if (err == -EDEADLK) {
- GEM_BUG_ON(i == 0);
- do {
- int j = i - 1;
-
- ww_mutex_unlock(&eb->vma[j].vma->resv->lock);
-
- swap(eb->vma[i], eb->vma[j]);
- } while (--i);
-
- err = ww_mutex_lock_slow_interruptible(&vma->resv->lock,
- &acquire);
- }
- if (err)
- break;
- }
- ww_acquire_done(&acquire);
-
while (i--) {
struct eb_vma *ev = &eb->vma[i];
struct i915_vma *vma = ev->vma;
if (err == 0)
err = i915_vma_move_to_active(vma, eb->request, flags);
-
- i915_vma_unlock(vma);
}
- ww_acquire_fini(&acquire);
if (unlikely(err))
goto err_skip;
if (err)
goto err_commit;
- err = dma_resv_lock_interruptible(pw->batch->resv, NULL);
- if (err)
- goto err_commit;
-
err = dma_resv_reserve_shared(pw->batch->resv, 1);
if (err)
- goto err_commit_unlock;
+ goto err_commit;
/* Wait for all writes (and relocs) into the batch to complete */
err = i915_sw_fence_await_reservation(&pw->base.chain,
pw->batch->resv, NULL, false,
0, I915_FENCE_GFP);
if (err < 0)
- goto err_commit_unlock;
+ goto err_commit;
/* Keep the batch alive and unwritten as we parse */
dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
- dma_resv_unlock(pw->batch->resv);
-
/* Force execution to wait for completion of the parser */
- dma_resv_lock(shadow->resv, NULL);
dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
- dma_resv_unlock(shadow->resv);
dma_fence_work_commit_imm(&pw->base);
return 0;
-err_commit_unlock:
- dma_resv_unlock(pw->batch->resv);
err_commit:
i915_sw_fence_set_error_once(&pw->base.chain, err);
dma_fence_work_commit_imm(&pw->base);
static int eb_parse(struct i915_execbuffer *eb)
{
struct drm_i915_private *i915 = eb->i915;
- struct intel_gt_buffer_pool_node *pool;
+ struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
struct i915_vma *shadow, *trampoline;
unsigned int len;
int err;
len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
}
- pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
- if (IS_ERR(pool))
- return PTR_ERR(pool);
+ if (!pool) {
+ pool = intel_gt_get_buffer_pool(eb->engine->gt, len);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+ eb->batch_pool = pool;
+ }
+
+ err = i915_gem_object_lock(pool->obj, &eb->ww);
+ if (err)
+ goto err;
shadow = shadow_batch_pin(pool->obj, eb->context->vm, PIN_USER);
if (IS_ERR(shadow)) {
err_shadow:
i915_vma_unpin(shadow);
err:
- intel_gt_buffer_pool_put(pool);
return err;
}
eb.exec = exec;
eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
eb.vma[0].vma = NULL;
+ eb.reloc_pool = eb.batch_pool = NULL;
eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
reloc_cache_init(&eb.reloc_cache, eb.i915);
if (unlikely(err))
goto err_context;
+ err = eb_lookup_vmas(&eb);
+ if (err) {
+ eb_release_vmas(&eb, true);
+ goto err_engine;
+ }
+
+ i915_gem_ww_ctx_init(&eb.ww, true);
+
err = eb_relocate_parse(&eb);
if (err) {
/*
goto err_vma;
}
+ ww_acquire_done(&eb.ww.ctx);
+
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
vma = i915_gem_object_ggtt_pin(eb.batch->vma->obj, NULL, 0, 0, 0);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
- goto err_parse;
+ goto err_vma;
}
batch = vma;
* to explicitly hold another reference here.
*/
eb.request->batch = batch;
- if (batch->private)
- intel_gt_buffer_pool_mark_active(batch->private, eb.request);
+ if (eb.batch_pool)
+ intel_gt_buffer_pool_mark_active(eb.batch_pool, eb.request);
trace_i915_request_queue(eb.request, eb.batch_flags);
err = eb_submit(&eb, batch);
err_batch_unpin:
if (eb.batch_flags & I915_DISPATCH_SECURE)
i915_vma_unpin(batch);
-err_parse:
- if (batch->private)
- intel_gt_buffer_pool_put(batch->private);
err_vma:
- if (eb.exec)
- eb_release_vmas(&eb);
+ eb_release_vmas(&eb, true);
if (eb.trampoline)
i915_vma_unpin(eb.trampoline);
+ WARN_ON(err == -EDEADLK);
+ i915_gem_ww_ctx_fini(&eb.ww);
+
+ if (eb.batch_pool)
+ intel_gt_buffer_pool_put(eb.batch_pool);
+ if (eb.reloc_pool)
+ intel_gt_buffer_pool_put(eb.reloc_pool);
+err_engine:
eb_unpin_engine(&eb);
err_context:
i915_gem_context_put(eb.gem_context);