#include "i915_drv.h"
#include "gt/intel_gpu_commands.h"
+#include "gt/intel_lrc.h"
#include "gt/intel_ring.h"
+#include "gt/intel_gt_requests.h"
#include "gvt.h"
#include "i915_pvinfo.h"
#include "trace.h"
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+
#define INVALID_OP (~0U)
#define OP_LEN_MI 9
RING_BUFFER_INSTRUCTION,
BATCH_BUFFER_INSTRUCTION,
BATCH_BUFFER_2ND_LEVEL,
+ RING_BUFFER_CTX,
};
enum {
*/
int saved_buf_addr_type;
bool is_ctx_wa;
+ bool is_init_ctx;
const struct cmd_info *info;
return *cmd_ptr(s, index);
}
+static inline bool is_init_ctx(struct parser_exec_state *s)
+{
+ return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx);
+}
+
static void parser_exec_state_dump(struct parser_exec_state *s)
{
int cnt = 0;
gvt_dbg_cmd(" %s %s ip_gma(%08lx) ",
s->buf_type == RING_BUFFER_INSTRUCTION ?
- "RING_BUFFER" : "BATCH_BUFFER",
+ "RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ?
+ "CTX_BUFFER" : "BATCH_BUFFER"),
s->buf_addr_type == GTT_BUFFER ?
"GTT" : "PPGTT", s->ip_gma);
if (WARN_ON(s->ring_head == s->ring_tail))
return;
- if (s->buf_type == RING_BUFFER_INSTRUCTION) {
+ if (s->buf_type == RING_BUFFER_INSTRUCTION ||
+ s->buf_type == RING_BUFFER_CTX) {
unsigned long ring_top = s->ring_start + s->ring_size;
if (s->ring_head > s->ring_tail) {
*addr = val; \
} while (0)
-static bool is_shadowed_mmio(unsigned int offset)
-{
- bool ret = false;
-
- if ((offset == 0x2168) || /*BB current head register UDW */
- (offset == 0x2140) || /*BB current header register */
- (offset == 0x211c) || /*second BB header register UDW */
- (offset == 0x2114)) { /*second BB header register UDW */
- ret = true;
- }
- return ret;
-}
-
-static inline bool is_force_nonpriv_mmio(unsigned int offset)
-{
- return (offset >= 0x24d0 && offset < 0x2500);
-}
-
-static int force_nonpriv_reg_handler(struct parser_exec_state *s,
- unsigned int offset, unsigned int index, char *cmd)
-{
- struct intel_gvt *gvt = s->vgpu->gvt;
- unsigned int data;
- u32 ring_base;
- u32 nopid;
-
- if (!strcmp(cmd, "lri"))
- data = cmd_val(s, index + 1);
- else {
- gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n",
- offset, cmd);
- return -EINVAL;
- }
-
- ring_base = s->engine->mmio_base;
- nopid = i915_mmio_reg_offset(RING_NOPID(ring_base));
-
- if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) &&
- data != nopid) {
- gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n",
- offset, data);
- patch_value(s, cmd_ptr(s, index), nopid);
- return 0;
- }
- return 0;
-}
-
static inline bool is_mocs_mmio(unsigned int offset)
{
return ((offset >= 0xc800) && (offset <= 0xcff8)) ||
((offset >= 0xb020) && (offset <= 0xb0a0));
}
-static int mocs_cmd_reg_handler(struct parser_exec_state *s,
- unsigned int offset, unsigned int index)
-{
- if (!is_mocs_mmio(offset))
- return -EINVAL;
- vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1);
- return 0;
-}
-
static int is_cmd_update_pdps(unsigned int offset,
struct parser_exec_state *s)
{
struct intel_vgpu *vgpu = s->vgpu;
struct intel_gvt *gvt = vgpu->gvt;
u32 ctx_sr_ctl;
+ u32 *vreg, vreg_old;
if (offset + 4 > gvt->device_info.mmio_size) {
gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
return -EFAULT;
}
+ if (is_init_ctx(s)) {
+ struct intel_gvt_mmio_info *mmio_info;
+
+ intel_gvt_mmio_set_cmd_accessible(gvt, offset);
+ mmio_info = intel_gvt_find_mmio_info(gvt, offset);
+ if (mmio_info && mmio_info->write)
+ intel_gvt_mmio_set_cmd_write_patch(gvt, offset);
+ return 0;
+ }
+
if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) {
gvt_vgpu_err("%s access to non-render register (%x)\n",
cmd, offset);
return -EBADRQC;
}
- if (is_shadowed_mmio(offset)) {
- gvt_vgpu_err("found access of shadowed MMIO %x\n", offset);
- return 0;
+ if (!strncmp(cmd, "srm", 3) ||
+ !strncmp(cmd, "lrm", 3)) {
+ if (offset != i915_mmio_reg_offset(GEN8_L3SQCREG4) &&
+ offset != 0x21f0) {
+ gvt_vgpu_err("%s access to register (%x)\n",
+ cmd, offset);
+ return -EPERM;
+ } else
+ return 0;
}
- if (is_mocs_mmio(offset) &&
- mocs_cmd_reg_handler(s, offset, index))
- return -EINVAL;
+ if (!strncmp(cmd, "lrr-src", 7) ||
+ !strncmp(cmd, "lrr-dst", 7)) {
+ gvt_vgpu_err("not allowed cmd %s\n", cmd);
+ return -EPERM;
+ }
+
+ if (!strncmp(cmd, "pipe_ctrl", 9)) {
+ /* TODO: add LRI POST logic here */
+ return 0;
+ }
- if (is_force_nonpriv_mmio(offset) &&
- force_nonpriv_reg_handler(s, offset, index, cmd))
+ if (strncmp(cmd, "lri", 3))
return -EPERM;
+ /* below are all lri handlers */
+ vreg = &vgpu_vreg(s->vgpu, offset);
+ if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) {
+ gvt_vgpu_err("%s access to non-render register (%x)\n",
+ cmd, offset);
+ return -EBADRQC;
+ }
+
+ if (is_cmd_update_pdps(offset, s) &&
+ cmd_pdp_mmio_update_handler(s, offset, index))
+ return -EINVAL;
+
if (offset == i915_mmio_reg_offset(DERRMR) ||
offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
/* Writing to HW VGT_PVINFO_PAGE offset will be discarded */
patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE);
}
- if (is_cmd_update_pdps(offset, s) &&
- cmd_pdp_mmio_update_handler(s, offset, index))
- return -EINVAL;
+ if (is_mocs_mmio(offset))
+ *vreg = cmd_val(s, index + 1);
+
+ vreg_old = *vreg;
+
+ if (intel_gvt_mmio_is_cmd_write_patch(gvt, offset)) {
+ u32 cmdval_new, cmdval;
+ struct intel_gvt_mmio_info *mmio_info;
+
+ cmdval = cmd_val(s, index + 1);
+
+ mmio_info = intel_gvt_find_mmio_info(gvt, offset);
+ if (!mmio_info) {
+ cmdval_new = cmdval;
+ } else {
+ u64 ro_mask = mmio_info->ro_mask;
+ int ret;
+
+ if (likely(!ro_mask))
+ ret = mmio_info->write(s->vgpu, offset,
+ &cmdval, 4);
+ else {
+ gvt_vgpu_err("try to write RO reg %x\n",
+ offset);
+ ret = -EBADRQC;
+ }
+ if (ret)
+ return ret;
+ cmdval_new = *vreg;
+ }
+ if (cmdval_new != cmdval)
+ patch_value(s, cmd_ptr(s, index+1), cmdval_new);
+ }
+
+ /* only patch cmd. restore vreg value if changed in mmio write handler*/
+ *vreg = vreg_old;
/* TODO
* In order to let workload with inhibit context to generate
s->buf_type = BATCH_BUFFER_INSTRUCTION;
ret = ip_gma_set(s, s->ret_ip_gma_bb);
s->buf_addr_type = s->saved_buf_addr_type;
+ } else if (s->buf_type == RING_BUFFER_CTX) {
+ ret = ip_gma_set(s, s->ring_tail);
} else {
s->buf_type = RING_BUFFER_INSTRUCTION;
s->buf_addr_type = GTT_BUFFER;
gma_bottom = rb_start + rb_len;
while (s->ip_gma != gma_tail) {
- if (s->buf_type == RING_BUFFER_INSTRUCTION) {
+ if (s->buf_type == RING_BUFFER_INSTRUCTION ||
+ s->buf_type == RING_BUFFER_CTX) {
if (!(s->ip_gma >= rb_start) ||
!(s->ip_gma < gma_bottom)) {
gvt_vgpu_err("ip_gma %lx out of ring scope."
return 0;
}
+/* generate dummy contexts by sending empty requests to HW, and let
+ * the HW to fill Engine Contexts. This dummy contexts are used for
+ * initialization purpose (update reg whitelist), so referred to as
+ * init context here
+ */
+void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
+{
+ struct intel_gvt *gvt = vgpu->gvt;
+ struct drm_i915_private *dev_priv = gvt->gt->i915;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ const unsigned long start = LRC_STATE_PN * PAGE_SIZE;
+ struct i915_request *rq;
+ struct intel_vgpu_submission *s = &vgpu->submission;
+ struct i915_request *requests[I915_NUM_ENGINES] = {};
+ bool is_ctx_pinned[I915_NUM_ENGINES] = {};
+ int ret;
+
+ if (gvt->is_reg_whitelist_updated)
+ return;
+
+ for_each_engine(engine, &dev_priv->gt, id) {
+ ret = intel_context_pin(s->shadow[id]);
+ if (ret) {
+ gvt_vgpu_err("fail to pin shadow ctx\n");
+ goto out;
+ }
+ is_ctx_pinned[id] = true;
+
+ rq = i915_request_create(s->shadow[id]);
+ if (IS_ERR(rq)) {
+ gvt_vgpu_err("fail to alloc default request\n");
+ ret = -EIO;
+ goto out;
+ }
+ requests[id] = i915_request_get(rq);
+ i915_request_add(rq);
+ }
+
+ if (intel_gt_wait_for_idle(&dev_priv->gt,
+ I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /* scan init ctx to update cmd accessible list */
+ for_each_engine(engine, &dev_priv->gt, id) {
+ int size = engine->context_size - PAGE_SIZE;
+ void *vaddr;
+ struct parser_exec_state s;
+ struct drm_i915_gem_object *obj;
+ struct i915_request *rq;
+
+ rq = requests[id];
+ GEM_BUG_ON(!i915_request_completed(rq));
+ GEM_BUG_ON(!intel_context_is_pinned(rq->context));
+ obj = rq->context->state->obj;
+
+ if (!obj) {
+ ret = -EIO;
+ goto out;
+ }
+
+ i915_gem_object_set_cache_coherency(obj,
+ I915_CACHE_LLC);
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ gvt_err("failed to pin init ctx obj, ring=%d, err=%lx\n",
+ id, PTR_ERR(vaddr));
+ goto out;
+ }
+
+ s.buf_type = RING_BUFFER_CTX;
+ s.buf_addr_type = GTT_BUFFER;
+ s.vgpu = vgpu;
+ s.engine = engine;
+ s.ring_start = 0;
+ s.ring_size = size;
+ s.ring_head = 0;
+ s.ring_tail = size;
+ s.rb_va = vaddr + start;
+ s.workload = NULL;
+ s.is_ctx_wa = false;
+ s.is_init_ctx = true;
+
+ /* skipping the first RING_CTX_SIZE(0x50) dwords */
+ ret = ip_gma_set(&s, RING_CTX_SIZE);
+ if (ret) {
+ i915_gem_object_unpin_map(obj);
+ goto out;
+ }
+
+ ret = command_scan(&s, 0, size, 0, size);
+ if (ret)
+ gvt_err("Scan init ctx error\n");
+
+ i915_gem_object_unpin_map(obj);
+ }
+
+out:
+ if (!ret)
+ gvt->is_reg_whitelist_updated = true;
+
+ for (id = 0; id < I915_NUM_ENGINES ; id++) {
+ if (requests[id])
+ i915_request_put(requests[id]);
+
+ if (is_ctx_pinned[id])
+ intel_context_unpin(s->shadow[id]);
+ }
+}
+
+int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload)
+{
+ struct intel_vgpu *vgpu = workload->vgpu;
+ unsigned long gma_head, gma_tail, gma_start, ctx_size;
+ struct parser_exec_state s;
+ int ring_id = workload->engine->id;
+ struct intel_context *ce = vgpu->submission.shadow[ring_id];
+ int ret;
+
+ GEM_BUG_ON(atomic_read(&ce->pin_count) < 0);
+
+ ctx_size = workload->engine->context_size - PAGE_SIZE;
+
+ /* Only ring contxt is loaded to HW for inhibit context, no need to
+ * scan engine context
+ */
+ if (is_inhibit_context(ce))
+ return 0;
+
+ gma_start = i915_ggtt_offset(ce->state) + LRC_STATE_PN*PAGE_SIZE;
+ gma_head = 0;
+ gma_tail = ctx_size;
+
+ s.buf_type = RING_BUFFER_CTX;
+ s.buf_addr_type = GTT_BUFFER;
+ s.vgpu = workload->vgpu;
+ s.engine = workload->engine;
+ s.ring_start = gma_start;
+ s.ring_size = ctx_size;
+ s.ring_head = gma_start + gma_head;
+ s.ring_tail = gma_start + gma_tail;
+ s.rb_va = ce->lrc_reg_state;
+ s.workload = workload;
+ s.is_ctx_wa = false;
+ s.is_init_ctx = false;
+
+ /* don't scan the first RING_CTX_SIZE(0x50) dwords, as it's ring
+ * context
+ */
+ ret = ip_gma_set(&s, gma_start + gma_head + RING_CTX_SIZE);
+ if (ret)
+ goto out;
+
+ ret = command_scan(&s, gma_head, gma_tail,
+ gma_start, ctx_size);
+out:
+ if (ret)
+ gvt_vgpu_err("scan shadow ctx error\n");
+
+ return ret;
+}
+
static int init_cmd_table(struct intel_gvt *gvt)
{
unsigned int gen_type = intel_gvt_get_device_type(gvt);
memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes);
}
-static struct intel_gvt_mmio_info *find_mmio_info(struct intel_gvt *gvt,
+struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
unsigned int offset)
{
struct intel_gvt_mmio_info *e;
}
static int new_mmio_info(struct intel_gvt *gvt,
- u32 offset, u8 flags, u32 size,
+ u32 offset, u16 flags, u32 size,
u32 addr_mask, u32 ro_mask, u32 device,
gvt_mmio_func read, gvt_mmio_func write)
{
return -ENOMEM;
info->offset = i;
- p = find_mmio_info(gvt, info->offset);
+ p = intel_gvt_find_mmio_info(gvt, info->offset);
if (p) {
WARN(1, "dup mmio definition offset %x\n",
info->offset);
/* RING MODE */
#define RING_REG(base) _MMIO((base) + 0x29c)
- MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
+ MMIO_RING_DFH(RING_REG, D_ALL,
+ F_MODE_MASK | F_CMD_ACCESS | F_CMD_WRITE_PATCH, NULL,
ring_mode_mmio_write);
#undef RING_REG
MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL);
MMIO_D(_MMIO(0xb110), D_BDW);
- MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
- NULL, force_nonpriv_write);
+ MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS | F_CMD_WRITE_PATCH, 0, 0,
+ D_BDW_PLUS, NULL, force_nonpriv_write);
MMIO_D(_MMIO(0x44484), D_BDW_PLUS);
MMIO_D(_MMIO(0x4448c), D_BDW_PLUS);
/*
* Normal tracked MMIOs.
*/
- mmio_info = find_mmio_info(gvt, offset);
+ mmio_info = intel_gvt_find_mmio_info(gvt, offset);
if (!mmio_info) {
gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes);
goto default_rw;