Merge branches 'pm-cpufreq', 'pm-sleep' and 'pm-em'
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Fri, 10 Sep 2021 18:26:08 +0000 (20:26 +0200)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Fri, 10 Sep 2021 18:26:08 +0000 (20:26 +0200)
* pm-cpufreq:
  cpufreq: intel_pstate: hybrid: Rework HWP calibration
  ACPI: CPPC: Introduce cppc_get_nominal_perf()

* pm-sleep:
  PM: sleep: core: Avoid setting power.must_resume to false
  PM: sleep: wakeirq: drop useless parameter from dev_pm_attach_wake_irq()

* pm-em:
  Documentation: power: include kernel-doc in Energy Model doc
  PM: EM: fix kernel-doc comments

Documentation/power/energy-model.rst
drivers/acpi/cppc_acpi.c
drivers/base/power/main.c
drivers/base/power/wakeirq.c
drivers/cpufreq/intel_pstate.c
include/acpi/cppc_acpi.h
include/linux/energy_model.h

index 60ac091..8a2788a 100644 (file)
@@ -101,8 +101,7 @@ subsystems which use EM might rely on this flag to check if all EM devices use
 the same scale. If there are different scales, these subsystems might decide
 to: return warning/error, stop working or panic.
 See Section 3. for an example of driver implementing this
-callback, and kernel/power/energy_model.c for further documentation on this
-API.
+callback, or Section 2.4 for further documentation on this API
 
 
 2.3 Accessing performance domains
@@ -123,7 +122,17 @@ em_cpu_energy() API. The estimation is performed assuming that the schedutil
 CPUfreq governor is in use in case of CPU device. Currently this calculation is
 not provided for other type of devices.
 
-More details about the above APIs can be found in include/linux/energy_model.h.
+More details about the above APIs can be found in ``<linux/energy_model.h>``
+or in Section 2.4
+
+
+2.4 Description details of this API
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. kernel-doc:: include/linux/energy_model.h
+   :internal:
+
+.. kernel-doc:: kernel/power/energy_model.c
+   :export:
 
 
 3. Example driver
index a4d4eeb..bd48210 100644 (file)
@@ -1008,23 +1008,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
        return ret_val;
 }
 
-/**
- * cppc_get_desired_perf - Get the value of desired performance register.
- * @cpunum: CPU from which to get desired performance.
- * @desired_perf: address of a variable to store the returned desired performance
- *
- * Return: 0 for success, -EIO otherwise.
- */
-int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
 {
        struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
-       int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
-       struct cpc_register_resource *desired_reg;
-       struct cppc_pcc_data *pcc_ss_data = NULL;
-
-       desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+       struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx];
 
-       if (CPC_IN_PCC(desired_reg)) {
+       if (CPC_IN_PCC(reg)) {
+               int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
+               struct cppc_pcc_data *pcc_ss_data = NULL;
                int ret = 0;
 
                if (pcc_ss_id < 0)
@@ -1035,7 +1026,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
                down_write(&pcc_ss_data->pcc_lock);
 
                if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0)
-                       cpc_read(cpunum, desired_reg, desired_perf);
+                       cpc_read(cpunum, reg, perf);
                else
                        ret = -EIO;
 
@@ -1044,12 +1035,36 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
                return ret;
        }
 
-       cpc_read(cpunum, desired_reg, desired_perf);
+       cpc_read(cpunum, reg, perf);
 
        return 0;
 }
+
+/**
+ * cppc_get_desired_perf - Get the desired performance register value.
+ * @cpunum: CPU from which to get desired performance.
+ * @desired_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+{
+       return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf);
+}
 EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
 
+/**
+ * cppc_get_nominal_perf - Get the nominal performance register value.
+ * @cpunum: CPU from which to get nominal performance.
+ * @nominal_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+{
+       return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
+}
+
 /**
  * cppc_get_perf_caps - Get a CPU's performance capabilities.
  * @cpunum: CPU from which to get capabilities info.
index d568772..cbea78e 100644 (file)
@@ -1642,7 +1642,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
        }
 
        dev->power.may_skip_resume = true;
-       dev->power.must_resume = false;
+       dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME);
 
        dpm_watchdog_set(&wd, dev);
        device_lock(dev);
index 3bad326..b91a3a9 100644 (file)
 /**
  * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ
  * @dev: Device entry
- * @irq: Device wake-up capable interrupt
  * @wirq: Wake irq specific data
  *
- * Internal function to attach either a device IO interrupt or a
- * dedicated wake-up interrupt as a wake IRQ.
+ * Internal function to attach a dedicated wake-up interrupt as a wake IRQ.
  */
-static int dev_pm_attach_wake_irq(struct device *dev, int irq,
-                                 struct wake_irq *wirq)
+static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq)
 {
        unsigned long flags;
 
@@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq)
        wirq->dev = dev;
        wirq->irq = irq;
 
-       err = dev_pm_attach_wake_irq(dev, irq, wirq);
+       err = dev_pm_attach_wake_irq(dev, wirq);
        if (err)
                kfree(wirq);
 
@@ -196,7 +193,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
        if (err)
                goto err_free_name;
 
-       err = dev_pm_attach_wake_irq(dev, irq, wirq);
+       err = dev_pm_attach_wake_irq(dev, wirq);
        if (err)
                goto err_free_irq;
 
index 2d83a9f..1097f82 100644 (file)
@@ -268,6 +268,7 @@ static struct cpudata **all_cpu_data;
  * @get_min:           Callback to get minimum P state
  * @get_turbo:         Callback to get turbo P state
  * @get_scaling:       Callback to get frequency scaling factor
+ * @get_cpu_scaling:   Get frequency scaling factor for a given cpu
  * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference
  * @get_val:           Callback to convert P state to actual MSR write value
  * @get_vid:           Callback to get VID data for Atom platforms
@@ -281,6 +282,7 @@ struct pstate_funcs {
        int (*get_min)(void);
        int (*get_turbo)(void);
        int (*get_scaling)(void);
+       int (*get_cpu_scaling)(int cpu);
        int (*get_aperf_mperf_shift)(void);
        u64 (*get_val)(struct cpudata*, int pstate);
        void (*get_vid)(struct cpudata *);
@@ -384,6 +386,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu)
        return cppc_perf.nominal_perf;
 }
 
+static u32 intel_pstate_cppc_nominal(int cpu)
+{
+       u64 nominal_perf;
+
+       if (cppc_get_nominal_perf(cpu, &nominal_perf))
+               return 0;
+
+       return nominal_perf;
+}
 #else /* CONFIG_ACPI_CPPC_LIB */
 static inline void intel_pstate_set_itmt_prio(int cpu)
 {
@@ -470,20 +481,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 
        acpi_processor_unregister_performance(policy->cpu);
 }
-
-static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
-{
-       return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
-}
-
-static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
-                                       struct cppc_perf_caps *caps)
-{
-       if (cppc_get_perf_caps(cpu->cpu, caps))
-               return false;
-
-       return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
-}
 #else /* CONFIG_ACPI */
 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
@@ -506,15 +503,8 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu)
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
 
-static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
-{
-       pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
-
-       cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling;
-}
-
 /**
- * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels.
+ * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
  * @cpu: Target CPU.
  *
  * On hybrid processors, HWP may expose more performance levels than there are
@@ -522,115 +512,46 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
  * scaling factor between HWP performance levels and CPU frequency will be less
  * than the scaling factor between P-state values and CPU frequency.
  *
- * In that case, the scaling factor between HWP performance levels and CPU
- * frequency needs to be determined which can be done with the help of the
- * observation that certain HWP performance levels should correspond to certain
- * P-states, like for example the HWP highest performance should correspond
- * to the maximum turbo P-state of the CPU.
+ * In that case, adjust the CPU parameters used in computations accordingly.
  */
-static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
+static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
 {
        int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
        int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
        int perf_ctl_turbo = pstate_funcs.get_turbo();
        int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
-       int perf_ctl_max = pstate_funcs.get_max();
-       int max_freq = perf_ctl_max * perf_ctl_scaling;
-       int scaling = INT_MAX;
-       int freq;
+       int scaling = cpu->pstate.scaling;
 
        pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
-       pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max);
+       pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max());
        pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
        pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
-
        pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
        pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
-
-#ifdef CONFIG_ACPI
-       if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) {
-               struct cppc_perf_caps caps;
-
-               if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
-                       if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
-                               pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
-
-                               /*
-                                * If the CPPC nominal performance is valid, it
-                                * can be assumed to correspond to cpu_khz.
-                                */
-                               if (caps.nominal_perf == perf_ctl_max_phys) {
-                                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                                       return;
-                               }
-                               scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
-                       } else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
-                               pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
-
-                               /*
-                                * If the CPPC guaranteed performance is valid,
-                                * it can be assumed to correspond to max_freq.
-                                */
-                               if (caps.guaranteed_perf == perf_ctl_max) {
-                                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                                       return;
-                               }
-                               scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
-                       }
-               }
-       }
-#endif
-       /*
-        * If using the CPPC data to compute the HWP-to-frequency scaling factor
-        * doesn't work, use the HWP_CAP gauranteed perf for this purpose with
-        * the assumption that it corresponds to max_freq.
-        */
-       if (scaling > perf_ctl_scaling) {
-               pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu);
-
-               if (cpu->pstate.max_pstate == perf_ctl_max) {
-                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                       return;
-               }
-               scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate);
-               if (scaling > perf_ctl_scaling) {
-                       /*
-                        * This should not happen, because it would mean that
-                        * the number of HWP perf levels was less than the
-                        * number of P-states, so use the PERF_CTL scaling in
-                        * that case.
-                        */
-                       pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu,
-                               scaling);
-
-                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                       return;
-               }
-       }
+       pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
 
        /*
-        * If the product of the HWP performance scaling factor obtained above
-        * and the HWP_CAP highest performance is greater than the maximum turbo
-        * frequency corresponding to the pstate_funcs.get_turbo() return value,
-        * the scaling factor is too high, so recompute it so that the HWP_CAP
-        * highest performance corresponds to the maximum turbo frequency.
+        * If the product of the HWP performance scaling factor and the HWP_CAP
+        * highest performance is greater than the maximum turbo frequency
+        * corresponding to the pstate_funcs.get_turbo() return value, the
+        * scaling factor is too high, so recompute it to make the HWP_CAP
+        * highest performance correspond to the maximum turbo frequency.
         */
        if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
-               pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling);
-
                cpu->pstate.turbo_freq = turbo_freq;
                scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
-       }
+               cpu->pstate.scaling = scaling;
 
-       cpu->pstate.scaling = scaling;
-
-       pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
+               pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n",
+                        cpu->cpu, scaling);
+       }
 
        cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
                                         perf_ctl_scaling);
 
-       freq = perf_ctl_max_phys * perf_ctl_scaling;
-       cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling);
+       cpu->pstate.max_pstate_physical =
+                       DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
+                                    scaling);
 
        cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
        /*
@@ -1861,6 +1782,38 @@ static int knl_get_turbo_pstate(void)
        return ret;
 }
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+static u32 hybrid_ref_perf;
+
+static int hybrid_get_cpu_scaling(int cpu)
+{
+       return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf,
+                           intel_pstate_cppc_nominal(cpu));
+}
+
+static void intel_pstate_cppc_set_cpu_scaling(void)
+{
+       u32 min_nominal_perf = U32_MAX;
+       int cpu;
+
+       for_each_present_cpu(cpu) {
+               u32 nominal_perf = intel_pstate_cppc_nominal(cpu);
+
+               if (nominal_perf && nominal_perf < min_nominal_perf)
+                       min_nominal_perf = nominal_perf;
+       }
+
+       if (min_nominal_perf < U32_MAX) {
+               hybrid_ref_perf = min_nominal_perf;
+               pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
+       }
+}
+#else
+static inline void intel_pstate_cppc_set_cpu_scaling(void)
+{
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
+
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
 {
        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
@@ -1889,10 +1842,8 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 {
-       bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU);
        int perf_ctl_max_phys = pstate_funcs.get_max_physical();
-       int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys :
-                                           pstate_funcs.get_scaling();
+       int perf_ctl_scaling = pstate_funcs.get_scaling();
 
        cpu->pstate.min_pstate = pstate_funcs.get_min();
        cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
@@ -1901,10 +1852,13 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
        if (hwp_active && !hwp_mode_bdw) {
                __intel_pstate_get_hwp_cap(cpu);
 
-               if (hybrid_cpu)
-                       intel_pstate_hybrid_hwp_calibrate(cpu);
-               else
+               if (pstate_funcs.get_cpu_scaling) {
+                       cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
+                       if (cpu->pstate.scaling != perf_ctl_scaling)
+                               intel_pstate_hybrid_hwp_adjust(cpu);
+               } else {
                        cpu->pstate.scaling = perf_ctl_scaling;
+               }
        } else {
                cpu->pstate.scaling = perf_ctl_scaling;
                cpu->pstate.max_pstate = pstate_funcs.get_max();
@@ -3276,6 +3230,9 @@ static int __init intel_pstate_init(void)
                        if (!default_driver)
                                default_driver = &intel_pstate;
 
+                       if (boot_cpu_has(X86_FEATURE_HYBRID_CPU))
+                               intel_pstate_cppc_set_cpu_scaling();
+
                        goto hwp_cpu_matched;
                }
        } else {
index 9f4985b..bc159a9 100644 (file)
@@ -135,6 +135,7 @@ struct cppc_cpudata {
 
 #ifdef CONFIG_ACPI_CPPC_LIB
 extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
+extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf);
 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
@@ -149,6 +150,10 @@ static inline int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
 {
        return -ENOTSUPP;
 }
+static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+{
+       return -ENOTSUPP;
+}
 static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
 {
        return -ENOTSUPP;
index 1834752..39dcadd 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 
 /**
- * em_perf_state - Performance state of a performance domain
+ * struct em_perf_state - Performance state of a performance domain
  * @frequency: The frequency in KHz, for consistency with CPUFreq
  * @power:     The power consumed at this level (by 1 CPU or by a registered
  *             device). It can be a total power: static and dynamic.
@@ -25,7 +25,7 @@ struct em_perf_state {
 };
 
 /**
- * em_perf_domain - Performance domain
+ * struct em_perf_domain - Performance domain
  * @table:             List of performance states, in ascending order
  * @nr_perf_states:    Number of performance states
  * @milliwatts:                Flag indicating the power values are in milli-Watts
@@ -103,12 +103,12 @@ void em_dev_unregister_perf_domain(struct device *dev);
 
 /**
  * em_cpu_energy() - Estimates the energy consumed by the CPUs of a
              performance domain
*             performance domain
  * @pd         : performance domain for which energy has to be estimated
  * @max_util   : highest utilization among CPUs of the domain
  * @sum_util   : sum of the utilization of all CPUs in the domain
  * @allowed_cpu_cap    : maximum allowed CPU capacity for the @pd, which
                        might reflect reduced frequency (due to thermal)
*                       might reflect reduced frequency (due to thermal)
  *
  * This function must be used only for CPU devices. There is no validation,
  * i.e. if the EM is a CPU type and has cpumask allocated. It is called from