Merge drm/drm-next into drm-intel-next-queued
authorRodrigo Vivi <rodrigo.vivi@intel.com>
Mon, 29 Jul 2019 15:51:48 +0000 (08:51 -0700)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Mon, 29 Jul 2019 15:51:48 +0000 (08:51 -0700)
Catching up with 5.3-rc*

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1  2 
drivers/gpu/drm/i915/Makefile.header-test
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/intel_runtime_pm.h

@@@ -2,36 -2,21 +2,26 @@@
  # Copyright © 2019 Intel Corporation
  
  # Test the headers are compilable as standalone units
- header_test := \
+ header-test-$(CONFIG_DRM_I915_WERROR) := \
        i915_active_types.h \
        i915_debugfs.h \
        i915_drv.h \
 +      i915_fixed.h \
 +      i915_gem_gtt.h \
 +      i915_globals.h \
        i915_irq.h \
        i915_params.h \
        i915_priolist_types.h \
 +      i915_pvinfo.h \
        i915_reg.h \
        i915_scheduler_types.h \
 -      i915_timeline_types.h \
        i915_utils.h \
 +      i915_vgpu.h \
        intel_csr.h \
        intel_drv.h \
 +      intel_gvt.h \
        intel_pm.h \
        intel_runtime_pm.h \
        intel_sideband.h \
        intel_uncore.h \
        intel_wakeref.h
- quiet_cmd_header_test = HDRTEST $@
-       cmd_header_test = echo "\#include \"$(<F)\"" > $@
- header_test_%.c: %.h
-       $(call cmd,header_test)
- i915-$(CONFIG_DRM_I915_WERROR) += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
- clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
  #include "gt/intel_lrc_reg.h"
  
  #include "i915_drv.h"
 -#include "i915_oa_hsw.h"
 -#include "i915_oa_bdw.h"
 -#include "i915_oa_chv.h"
 -#include "i915_oa_sklgt2.h"
 -#include "i915_oa_sklgt3.h"
 -#include "i915_oa_sklgt4.h"
 -#include "i915_oa_bxt.h"
 -#include "i915_oa_kblgt2.h"
 -#include "i915_oa_kblgt3.h"
 -#include "i915_oa_glk.h"
 -#include "i915_oa_cflgt2.h"
 -#include "i915_oa_cflgt3.h"
 -#include "i915_oa_cnl.h"
 -#include "i915_oa_icl.h"
 +#include "oa/i915_oa_hsw.h"
 +#include "oa/i915_oa_bdw.h"
 +#include "oa/i915_oa_chv.h"
 +#include "oa/i915_oa_sklgt2.h"
 +#include "oa/i915_oa_sklgt3.h"
 +#include "oa/i915_oa_sklgt4.h"
 +#include "oa/i915_oa_bxt.h"
 +#include "oa/i915_oa_kblgt2.h"
 +#include "oa/i915_oa_kblgt3.h"
 +#include "oa/i915_oa_glk.h"
 +#include "oa/i915_oa_cflgt2.h"
 +#include "oa/i915_oa_cflgt3.h"
 +#include "oa/i915_oa_cnl.h"
 +#include "oa/i915_oa_icl.h"
  
  /* HW requires this to be a power of two, between 128k and 16M, though driver
   * is currently generally designed assuming the largest 16M size is used such
  #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
  
  /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
- static int zero;
- static int one = 1;
  static u32 i915_perf_stream_paranoid = true;
  
  /* The maximum exponent the hardware accepts is 63 (essentially it selects one
@@@ -1569,10 -1567,28 +1567,10 @@@ static void config_oa_regs(struct drm_i
        }
  }
  
 -static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 +static void delay_after_mux(void)
  {
 -      struct drm_i915_private *dev_priv = stream->dev_priv;
 -      const struct i915_oa_config *oa_config = stream->oa_config;
 -
 -      /* PRM:
 -       *
 -       * OA unit is using “crclk” for its functionality. When trunk
 -       * level clock gating takes place, OA clock would be gated,
 -       * unable to count the events from non-render clock domain.
 -       * Render clock gating must be disabled when OA is enabled to
 -       * count the events from non-render domain. Unit level clock
 -       * gating for RCS should also be disabled.
 -       */
 -      I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
 -                                  ~GEN7_DOP_CLOCK_GATE_ENABLE));
 -      I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
 -                                GEN6_CSUNIT_CLOCK_GATE_DISABLE));
 -
 -      config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
 -
 -      /* It apparently takes a fairly long time for a new MUX
 +      /*
 +       * It apparently takes a fairly long time for a new MUX
         * configuration to be be applied after these register writes.
         * This delay duration was derived empirically based on the
         * render_basic config but hopefully it covers the maximum
         * a delay at this location would mitigate any invalid reports.
         */
        usleep_range(15000, 20000);
 +}
 +
 +static int hsw_enable_metric_set(struct i915_perf_stream *stream)
 +{
 +      struct drm_i915_private *dev_priv = stream->dev_priv;
 +      const struct i915_oa_config *oa_config = stream->oa_config;
 +
 +      /*
 +       * PRM:
 +       *
 +       * OA unit is using “crclk” for its functionality. When trunk
 +       * level clock gating takes place, OA clock would be gated,
 +       * unable to count the events from non-render clock domain.
 +       * Render clock gating must be disabled when OA is enabled to
 +       * count the events from non-render domain. Unit level clock
 +       * gating for RCS should also be disabled.
 +       */
 +      I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
 +                                  ~GEN7_DOP_CLOCK_GATE_ENABLE));
 +      I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
 +                                GEN6_CSUNIT_CLOCK_GATE_DISABLE));
 +
 +      config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
 +      delay_after_mux();
  
        config_oa_regs(dev_priv, oa_config->b_counter_regs,
                       oa_config->b_counter_regs_len);
@@@ -1636,27 -1628,6 +1634,27 @@@ static void hsw_disable_metric_set(stru
                                      ~GT_NOA_ENABLE));
  }
  
 +static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config,
 +                            i915_reg_t reg)
 +{
 +      u32 mmio = i915_mmio_reg_offset(reg);
 +      int i;
 +
 +      /*
 +       * This arbitrary default will select the 'EU FPU0 Pipeline
 +       * Active' event. In the future it's anticipated that there
 +       * will be an explicit 'No Event' we can select, but not yet...
 +       */
 +      if (!oa_config)
 +              return 0;
 +
 +      for (i = 0; i < oa_config->flex_regs_len; i++) {
 +              if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio)
 +                      return oa_config->flex_regs[i].value;
 +      }
 +
 +      return 0;
 +}
  /*
   * NB: It must always remain pointer safe to run this even if the OA unit
   * has been disabled.
@@@ -1690,138 -1661,33 +1688,138 @@@ gen8_update_reg_state_unlocked(struct i
                GEN8_OA_COUNTER_RESUME);
  
        for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
 -              u32 state_offset = ctx_flexeu0 + i * 2;
 -              u32 mmio = i915_mmio_reg_offset(flex_regs[i]);
 +              CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
 +                      oa_config_flex_reg(oa_config, flex_regs[i]));
 +      }
  
 -              /*
 -               * This arbitrary default will select the 'EU FPU0 Pipeline
 -               * Active' event. In the future it's anticipated that there
 -               * will be an explicit 'No Event' we can select, but not yet...
 -               */
 -              u32 value = 0;
 +      CTX_REG(reg_state,
 +              CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
 +              intel_sseu_make_rpcs(i915, &ce->sseu));
 +}
 +
 +struct flex {
 +      i915_reg_t reg;
 +      u32 offset;
 +      u32 value;
 +};
  
 -              if (oa_config) {
 -                      u32 j;
 +static int
 +gen8_store_flex(struct i915_request *rq,
 +              struct intel_context *ce,
 +              const struct flex *flex, unsigned int count)
 +{
 +      u32 offset;
 +      u32 *cs;
  
 -                      for (j = 0; j < oa_config->flex_regs_len; j++) {
 -                              if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
 -                                      value = oa_config->flex_regs[j].value;
 -                                      break;
 -                              }
 -                      }
 -              }
 +      cs = intel_ring_begin(rq, 4 * count);
 +      if (IS_ERR(cs))
 +              return PTR_ERR(cs);
 +
 +      offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
 +      do {
 +              *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
 +              *cs++ = offset + (flex->offset + 1) * sizeof(u32);
 +              *cs++ = 0;
 +              *cs++ = flex->value;
 +      } while (flex++, --count);
 +
 +      intel_ring_advance(rq, cs);
 +
 +      return 0;
 +}
 +
 +static int
 +gen8_load_flex(struct i915_request *rq,
 +             struct intel_context *ce,
 +             const struct flex *flex, unsigned int count)
 +{
 +      u32 *cs;
 +
 +      GEM_BUG_ON(!count || count > 63);
 +
 +      cs = intel_ring_begin(rq, 2 * count + 2);
 +      if (IS_ERR(cs))
 +              return PTR_ERR(cs);
 +
 +      *cs++ = MI_LOAD_REGISTER_IMM(count);
 +      do {
 +              *cs++ = i915_mmio_reg_offset(flex->reg);
 +              *cs++ = flex->value;
 +      } while (flex++, --count);
 +      *cs++ = MI_NOOP;
 +
 +      intel_ring_advance(rq, cs);
 +
 +      return 0;
 +}
  
 -              CTX_REG(reg_state, state_offset, flex_regs[i], value);
 +static int gen8_modify_context(struct intel_context *ce,
 +                             const struct flex *flex, unsigned int count)
 +{
 +      struct i915_request *rq;
 +      int err;
 +
 +      lockdep_assert_held(&ce->pin_mutex);
 +
 +      rq = i915_request_create(ce->engine->kernel_context);
 +      if (IS_ERR(rq))
 +              return PTR_ERR(rq);
 +
 +      /* Serialise with the remote context */
 +      err = intel_context_prepare_remote_request(ce, rq);
 +      if (err == 0)
 +              err = gen8_store_flex(rq, ce, flex, count);
 +
 +      i915_request_add(rq);
 +      return err;
 +}
 +
 +static int gen8_modify_self(struct intel_context *ce,
 +                          const struct flex *flex, unsigned int count)
 +{
 +      struct i915_request *rq;
 +      int err;
 +
 +      rq = i915_request_create(ce);
 +      if (IS_ERR(rq))
 +              return PTR_ERR(rq);
 +
 +      err = gen8_load_flex(rq, ce, flex, count);
 +
 +      i915_request_add(rq);
 +      return err;
 +}
 +
 +static int gen8_configure_context(struct i915_gem_context *ctx,
 +                                struct flex *flex, unsigned int count)
 +{
 +      struct i915_gem_engines_iter it;
 +      struct intel_context *ce;
 +      int err = 0;
 +
 +      for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
 +              GEM_BUG_ON(ce == ce->engine->kernel_context);
 +
 +              if (ce->engine->class != RENDER_CLASS)
 +                      continue;
 +
 +              err = intel_context_lock_pinned(ce);
 +              if (err)
 +                      break;
 +
 +              flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu);
 +
 +              /* Otherwise OA settings will be set upon first use */
 +              if (intel_context_is_pinned(ce))
 +                      err = gen8_modify_context(ce, flex, count);
 +
 +              intel_context_unlock_pinned(ce);
 +              if (err)
 +                      break;
        }
 +      i915_gem_context_unlock_engines(ctx);
  
 -      CTX_REG(reg_state,
 -              CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
 -              intel_sseu_make_rpcs(i915, &ce->sseu));
 +      return err;
  }
  
  /*
   *
   * Note: it's only the RCS/Render context that has any OA state.
   */
 -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
 +static int gen8_configure_all_contexts(struct drm_i915_private *i915,
                                       const struct i915_oa_config *oa_config)
  {
 -      unsigned int map_type = i915_coherent_map_type(dev_priv);
 +      /* The MMIO offsets for Flex EU registers aren't contiguous */
 +      const u32 ctx_flexeu0 = i915->perf.oa.ctx_flexeu0_offset;
 +#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
 +      struct flex regs[] = {
 +              {
 +                      GEN8_R_PWR_CLK_STATE,
 +                      CTX_R_PWR_CLK_STATE,
 +              },
 +              {
 +                      GEN8_OACTXCONTROL,
 +                      i915->perf.oa.ctx_oactxctrl_offset,
 +                      ((i915->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
 +                       (i915->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
 +                       GEN8_OA_COUNTER_RESUME)
 +              },
 +              { EU_PERF_CNTL0, ctx_flexeuN(0) },
 +              { EU_PERF_CNTL1, ctx_flexeuN(1) },
 +              { EU_PERF_CNTL2, ctx_flexeuN(2) },
 +              { EU_PERF_CNTL3, ctx_flexeuN(3) },
 +              { EU_PERF_CNTL4, ctx_flexeuN(4) },
 +              { EU_PERF_CNTL5, ctx_flexeuN(5) },
 +              { EU_PERF_CNTL6, ctx_flexeuN(6) },
 +      };
 +#undef ctx_flexeuN
 +      struct intel_engine_cs *engine;
        struct i915_gem_context *ctx;
 -      struct i915_request *rq;
 -      int ret;
 +      enum intel_engine_id id;
 +      int i;
 +
 +      for (i = 2; i < ARRAY_SIZE(regs); i++)
 +              regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
  
 -      lockdep_assert_held(&dev_priv->drm.struct_mutex);
 +      lockdep_assert_held(&i915->drm.struct_mutex);
  
        /*
         * The OA register config is setup through the context image. This image
         * this might leave small interval of time where the OA unit is
         * configured at an invalid sampling period.
         *
 -       * So far the best way to work around this issue seems to be draining
 -       * the GPU from any submitted work.
 +       * Note that since we emit all requests from a single ring, there
 +       * is still an implicit global barrier here that may cause a high
 +       * priority context to wait for an otherwise independent low priority
 +       * context. Contexts idle at the time of reconfiguration are not
 +       * trapped behind the barrier.
         */
 -      ret = i915_gem_wait_for_idle(dev_priv,
 -                                   I915_WAIT_LOCKED,
 -                                   MAX_SCHEDULE_TIMEOUT);
 -      if (ret)
 -              return ret;
 -
 -      /* Update all contexts now that we've stalled the submission. */
 -      list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
 -              struct i915_gem_engines_iter it;
 -              struct intel_context *ce;
 -
 -              for_each_gem_engine(ce,
 -                                  i915_gem_context_lock_engines(ctx),
 -                                  it) {
 -                      u32 *regs;
 -
 -                      if (ce->engine->class != RENDER_CLASS)
 -                              continue;
 -
 -                      /* OA settings will be set upon first use */
 -                      if (!ce->state)
 -                              continue;
 -
 -                      regs = i915_gem_object_pin_map(ce->state->obj,
 -                                                     map_type);
 -                      if (IS_ERR(regs)) {
 -                              i915_gem_context_unlock_engines(ctx);
 -                              return PTR_ERR(regs);
 -                      }
 +      list_for_each_entry(ctx, &i915->contexts.list, link) {
 +              int err;
  
 -                      ce->state->obj->mm.dirty = true;
 -                      regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
 -
 -                      gen8_update_reg_state_unlocked(ce, regs, oa_config);
 +              if (ctx == i915->kernel_context)
 +                      continue;
  
 -                      i915_gem_object_unpin_map(ce->state->obj);
 -              }
 -              i915_gem_context_unlock_engines(ctx);
 +              err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs));
 +              if (err)
 +                      return err;
        }
  
        /*
 -       * Apply the configuration by doing one context restore of the edited
 -       * context image.
 +       * After updating all other contexts, we need to modify ourselves.
 +       * If we don't modify the kernel_context, we do not get events while
 +       * idle.
         */
 -      rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
 -      if (IS_ERR(rq))
 -              return PTR_ERR(rq);
 +      for_each_engine(engine, i915, id) {
 +              struct intel_context *ce = engine->kernel_context;
 +              int err;
  
 -      i915_request_add(rq);
 +              if (engine->class != RENDER_CLASS)
 +                      continue;
 +
 +              regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
 +
 +              err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs));
 +              if (err)
 +                      return err;
 +      }
  
        return 0;
  }
@@@ -1979,7 -1835,6 +1977,7 @@@ static int gen8_enable_metric_set(struc
                return ret;
  
        config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
 +      delay_after_mux();
  
        config_oa_regs(dev_priv, oa_config->b_counter_regs,
                       oa_config->b_counter_regs_len);
@@@ -2660,9 -2515,6 +2658,9 @@@ static int i915_perf_release(struct ino
        i915_perf_destroy_locked(stream);
        mutex_unlock(&dev_priv->perf.lock);
  
 +      /* Release the reference the perf stream kept on the driver. */
 +      drm_dev_put(&dev_priv->drm);
 +
        return 0;
  }
  
@@@ -2798,11 -2650,6 +2796,11 @@@ i915_perf_open_ioctl_locked(struct drm_
        if (!(param->flags & I915_PERF_FLAG_DISABLED))
                i915_perf_enable_locked(stream);
  
 +      /* Take a reference on the driver that will be kept with stream_fd
 +       * until its release.
 +       */
 +      drm_dev_get(&dev_priv->drm);
 +
        return stream_fd;
  
  err_open:
@@@ -3517,8 -3364,8 +3515,8 @@@ static struct ctl_table oa_table[] = 
         .maxlen = sizeof(i915_perf_stream_paranoid),
         .mode = 0644,
         .proc_handler = proc_dointvec_minmax,
-        .extra1 = &zero,
-        .extra2 = &one,
+        .extra1 = SYSCTL_ZERO,
+        .extra2 = SYSCTL_ONE,
         },
        {
         .procname = "oa_max_sample_rate",
         .maxlen = sizeof(i915_oa_max_sample_rate),
         .mode = 0644,
         .proc_handler = proc_dointvec_minmax,
-        .extra1 = &zero,
+        .extra1 = SYSCTL_ZERO,
         .extra2 = &oa_sample_rate_hard_limit,
         },
        {}
@@@ -3630,13 -3477,9 +3628,13 @@@ void i915_perf_init(struct drm_i915_pri
                        dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
                        dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
  
 -                      dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
 -                      dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
 -
 +                      if (IS_GEN(dev_priv, 10)) {
 +                              dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
 +                              dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
 +                      } else {
 +                              dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124;
 +                              dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e;
 +                      }
                        dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
                }
        }
@@@ -45,7 -45,7 +45,7 @@@ enum i915_drm_suspend_mode 
   * to be disabled. This shouldn't happen and we'll print some error messages in
   * case it happens.
   *
-  * For more, read the Documentation/power/runtime_pm.txt.
+  * For more, read the Documentation/power/runtime_pm.rst.
   */
  struct intel_runtime_pm {
        atomic_t wakeref_count;
@@@ -173,7 -173,7 +173,7 @@@ enable_rpm_wakeref_asserts(struct intel
  void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm);
  void intel_runtime_pm_enable(struct intel_runtime_pm *rpm);
  void intel_runtime_pm_disable(struct intel_runtime_pm *rpm);
 -void intel_runtime_pm_cleanup(struct intel_runtime_pm *rpm);
 +void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm);
  
  intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
  intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);