1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _LINUX_PSI_TYPES_H
3 #define _LINUX_PSI_TYPES_H
5 #include <linux/kthread.h>
6 #include <linux/seqlock.h>
7 #include <linux/types.h>
8 #include <linux/kref.h>
9 #include <linux/wait.h>
13 /* Tracked task states */
19 * This can't have values other than 0 or 1 and could be
20 * implemented as a bit flag. But for now we still have room
21 * in the first cacheline of psi_group_cpu, and this way we
22 * don't have to special case any state tracking for it.
26 * For IO and CPU stalls the presence of running/oncpu tasks
27 * in the domain means a partial rather than a full stall.
28 * For memory it's not so simple because of page reclaimers:
29 * they are running/oncpu while representing a stall. To tell
30 * whether a domain has productivity left or not, we need to
31 * distinguish between regular running (i.e. productive)
32 * threads and memstall ones.
35 NR_PSI_TASK_COUNTS = 5,
38 /* Task state bitmasks */
39 #define TSK_IOWAIT (1 << NR_IOWAIT)
40 #define TSK_MEMSTALL (1 << NR_MEMSTALL)
41 #define TSK_RUNNING (1 << NR_RUNNING)
42 #define TSK_ONCPU (1 << NR_ONCPU)
43 #define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)
45 /* Resources that workloads could be stalled on */
54 * Pressure states for each resource:
56 * SOME: Stalled tasks & working tasks
57 * FULL: Stalled tasks & no working tasks
66 /* Only per-CPU, to weigh the CPU in the global average: */
71 enum psi_aggregators {
77 struct psi_group_cpu {
78 /* 1st cacheline updated by the scheduler */
80 /* Aggregator needs to know of concurrent changes */
81 seqcount_t seq ____cacheline_aligned_in_smp;
83 /* States of the tasks belonging to this group */
84 unsigned int tasks[NR_PSI_TASK_COUNTS];
86 /* Aggregate pressure state derived from the tasks */
89 /* Period time sampling buckets for each state of interest (ns) */
90 u32 times[NR_PSI_STATES];
92 /* Time of last task change in this group (rq_clock) */
95 /* 2nd cacheline updated by the aggregator */
97 /* Delta detection against the sampling buckets */
98 u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
99 ____cacheline_aligned_in_smp;
102 /* PSI growth tracking window */
104 /* Window size in ns */
107 /* Start time of the current window in ns */
110 /* Value at the start of the window */
113 /* Value growth in the previous window */
118 /* PSI state being monitored by the trigger */
119 enum psi_states state;
121 /* User-spacified threshold in ns */
124 /* List node inside triggers list */
125 struct list_head node;
127 /* Backpointer needed during trigger destruction */
128 struct psi_group *group;
130 /* Wait queue for polling */
131 wait_queue_head_t event_wait;
133 /* Pending event flag */
136 /* Tracking window */
137 struct psi_window win;
140 * Time last event was generated. Used for rate-limiting
141 * events to one per window
147 /* Protects data used by the aggregator */
148 struct mutex avgs_lock;
150 /* Per-cpu task state & time tracking */
151 struct psi_group_cpu __percpu *pcpu;
153 /* Running pressure averages */
154 u64 avg_total[NR_PSI_STATES - 1];
158 /* Aggregator work control */
159 struct delayed_work avgs_work;
161 /* Total stall times and sampled pressure averages */
162 u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
163 unsigned long avg[NR_PSI_STATES - 1][3];
165 /* Monitor work control */
166 struct task_struct __rcu *poll_task;
167 struct timer_list poll_timer;
168 wait_queue_head_t poll_wait;
169 atomic_t poll_wakeup;
171 /* Protects data used by the monitor */
172 struct mutex trigger_lock;
174 /* Configured polling triggers */
175 struct list_head triggers;
176 u32 nr_triggers[NR_PSI_STATES - 1];
180 /* Total stall times at the start of monitor activation */
181 u64 polling_total[NR_PSI_STATES - 1];
182 u64 polling_next_update;
186 #else /* CONFIG_PSI */
188 struct psi_group { };
190 #endif /* CONFIG_PSI */
192 #endif /* _LINUX_PSI_TYPES_H */