include/linux/psi_types.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _LINUX_PSI_TYPES_H
   3 #define _LINUX_PSI_TYPES_H
   4
   5 #include <linux/kthread.h>
   6 #include <linux/seqlock.h>
   7 #include <linux/types.h>
   8 #include <linux/kref.h>
   9 #include <linux/wait.h>
  10
  11 #ifdef CONFIG_PSI
  12
  13 /* Tracked task states */
  14 enum psi_task_count {
  15         NR_IOWAIT,
  16         NR_MEMSTALL,
  17         NR_RUNNING,
  18         /*
  19          * This can't have values other than 0 or 1 and could be
  20          * implemented as a bit flag. But for now we still have room
  21          * in the first cacheline of psi_group_cpu, and this way we
  22          * don't have to special case any state tracking for it.
  23          */
  24         NR_ONCPU,
  25         /*
  26          * For IO and CPU stalls the presence of running/oncpu tasks
  27          * in the domain means a partial rather than a full stall.
  28          * For memory it's not so simple because of page reclaimers:
  29          * they are running/oncpu while representing a stall. To tell
  30          * whether a domain has productivity left or not, we need to
  31          * distinguish between regular running (i.e. productive)
  32          * threads and memstall ones.
  33          */
  34         NR_MEMSTALL_RUNNING,
  35         NR_PSI_TASK_COUNTS = 5,
  36 };
  37
  38 /* Task state bitmasks */
  39 #define TSK_IOWAIT      (1 << NR_IOWAIT)
  40 #define TSK_MEMSTALL    (1 << NR_MEMSTALL)
  41 #define TSK_RUNNING     (1 << NR_RUNNING)
  42 #define TSK_ONCPU       (1 << NR_ONCPU)
  43 #define TSK_MEMSTALL_RUNNING    (1 << NR_MEMSTALL_RUNNING)
  44
  45 /* Resources that workloads could be stalled on */
  46 enum psi_res {
  47         PSI_IO,
  48         PSI_MEM,
  49         PSI_CPU,
  50         NR_PSI_RESOURCES = 3,
  51 };
  52
  53 /*
  54  * Pressure states for each resource:
  55  *
  56  * SOME: Stalled tasks & working tasks
  57  * FULL: Stalled tasks & no working tasks
  58  */
  59 enum psi_states {
  60         PSI_IO_SOME,
  61         PSI_IO_FULL,
  62         PSI_MEM_SOME,
  63         PSI_MEM_FULL,
  64         PSI_CPU_SOME,
  65         PSI_CPU_FULL,
  66         /* Only per-CPU, to weigh the CPU in the global average: */
  67         PSI_NONIDLE,
  68         NR_PSI_STATES = 7,
  69 };
  70
  71 enum psi_aggregators {
  72         PSI_AVGS = 0,
  73         PSI_POLL,
  74         NR_PSI_AGGREGATORS,
  75 };
  76
  77 struct psi_group_cpu {
  78         /* 1st cacheline updated by the scheduler */
  79
  80         /* Aggregator needs to know of concurrent changes */
  81         seqcount_t seq ____cacheline_aligned_in_smp;
  82
  83         /* States of the tasks belonging to this group */
  84         unsigned int tasks[NR_PSI_TASK_COUNTS];
  85
  86         /* Aggregate pressure state derived from the tasks */
  87         u32 state_mask;
  88
  89         /* Period time sampling buckets for each state of interest (ns) */
  90         u32 times[NR_PSI_STATES];
  91
  92         /* Time of last task change in this group (rq_clock) */
  93         u64 state_start;
  94
  95         /* 2nd cacheline updated by the aggregator */
  96
  97         /* Delta detection against the sampling buckets */
  98         u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
  99                         ____cacheline_aligned_in_smp;
 100 };
 101
 102 /* PSI growth tracking window */
 103 struct psi_window {
 104         /* Window size in ns */
 105         u64 size;
 106
 107         /* Start time of the current window in ns */
 108         u64 start_time;
 109
 110         /* Value at the start of the window */
 111         u64 start_value;
 112
 113         /* Value growth in the previous window */
 114         u64 prev_growth;
 115 };
 116
 117 struct psi_trigger {
 118         /* PSI state being monitored by the trigger */
 119         enum psi_states state;
 120
 121         /* User-spacified threshold in ns */
 122         u64 threshold;
 123
 124         /* List node inside triggers list */
 125         struct list_head node;
 126
 127         /* Backpointer needed during trigger destruction */
 128         struct psi_group *group;
 129
 130         /* Wait queue for polling */
 131         wait_queue_head_t event_wait;
 132
 133         /* Pending event flag */
 134         int event;
 135
 136         /* Tracking window */
 137         struct psi_window win;
 138
 139         /*
 140          * Time last event was generated. Used for rate-limiting
 141          * events to one per window
 142          */
 143         u64 last_event_time;
 144 };
 145
 146 struct psi_group {
 147         /* Protects data used by the aggregator */
 148         struct mutex avgs_lock;
 149
 150         /* Per-cpu task state & time tracking */
 151         struct psi_group_cpu __percpu *pcpu;
 152
 153         /* Running pressure averages */
 154         u64 avg_total[NR_PSI_STATES - 1];
 155         u64 avg_last_update;
 156         u64 avg_next_update;
 157
 158         /* Aggregator work control */
 159         struct delayed_work avgs_work;
 160
 161         /* Total stall times and sampled pressure averages */
 162         u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
 163         unsigned long avg[NR_PSI_STATES - 1][3];
 164
 165         /* Monitor work control */
 166         struct task_struct __rcu *poll_task;
 167         struct timer_list poll_timer;
 168         wait_queue_head_t poll_wait;
 169         atomic_t poll_wakeup;
 170
 171         /* Protects data used by the monitor */
 172         struct mutex trigger_lock;
 173
 174         /* Configured polling triggers */
 175         struct list_head triggers;
 176         u32 nr_triggers[NR_PSI_STATES - 1];
 177         u32 poll_states;
 178         u64 poll_min_period;
 179
 180         /* Total stall times at the start of monitor activation */
 181         u64 polling_total[NR_PSI_STATES - 1];
 182         u64 polling_next_update;
 183         u64 polling_until;
 184 };
 185
 186 #else /* CONFIG_PSI */
 187
 188 struct psi_group { };
 189
 190 #endif /* CONFIG_PSI */
 191
 192 #endif /* _LINUX_PSI_TYPES_H */