1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
4 #include <linux/kernel.h>
5 #include <linux/percpu.h>
6 #include <linux/slab.h>
7 #include <linux/static_key.h>
8 #include <linux/interrupt.h>
10 #include <linux/irq.h>
11 #include <linux/math64.h>
12 #include <linux/log2.h>
14 #include <trace/events/irq.h>
16 #include "internals.h"
18 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
20 DEFINE_PER_CPU(struct irq_timings, irq_timings);
22 static DEFINE_IDR(irqt_stats);
24 void irq_timings_enable(void)
26 static_branch_enable(&irq_timing_enabled);
29 void irq_timings_disable(void)
31 static_branch_disable(&irq_timing_enabled);
35 * The main goal of this algorithm is to predict the next interrupt
36 * occurrence on the current CPU.
38 * Currently, the interrupt timings are stored in a circular array
39 * buffer every time there is an interrupt, as a tuple: the interrupt
40 * number and the associated timestamp when the event occurred <irq,
43 * For every interrupt occurring in a short period of time, we can
44 * measure the elapsed time between the occurrences for the same
45 * interrupt and we end up with a suite of intervals. The experience
46 * showed the interrupts are often coming following a periodic
49 * The objective of the algorithm is to find out this periodic pattern
50 * in a fastest way and use its period to predict the next irq event.
52 * When the next interrupt event is requested, we are in the situation
53 * where the interrupts are disabled and the circular buffer
54 * containing the timings is filled with the events which happened
55 * after the previous next-interrupt-event request.
57 * At this point, we read the circular buffer and we fill the irq
58 * related statistics structure. After this step, the circular array
59 * containing the timings is empty because all the values are
60 * dispatched in their corresponding buffers.
62 * Now for each interrupt, we can predict the next event by using the
63 * suffix array, log interval and exponential moving average
67 * Suffix array is an array of all the suffixes of a string. It is
68 * widely used as a data structure for compression, text search, ...
69 * For instance for the word 'banana', the suffixes will be: 'banana'
70 * 'anana' 'nana' 'ana' 'na' 'a'
72 * Usually, the suffix array is sorted but for our purpose it is
73 * not necessary and won't provide any improvement in the context of
74 * the solved problem where we clearly define the boundaries of the
75 * search by a max period and min period.
77 * The suffix array will build a suite of intervals of different
78 * length and will look for the repetition of each suite. If the suite
79 * is repeating then we have the period because it is the length of
80 * the suite whatever its position in the buffer.
84 * We saw the irq timings allow to compute the interval of the
85 * occurrences for a specific interrupt. We can reasonibly assume the
86 * longer is the interval, the higher is the error for the next event
87 * and we can consider storing those interval values into an array
88 * where each slot in the array correspond to an interval at the power
89 * of 2 of the index. For example, index 12 will contain values
90 * between 2^11 and 2^12.
92 * At the end we have an array of values where at each index defines a
93 * [2^index - 1, 2 ^ index] interval values allowing to store a large
94 * number of values inside a small array.
96 * For example, if we have the value 1123, then we store it at
97 * ilog2(1123) = 10 index value.
99 * Storing those value at the specific index is done by computing an
100 * exponential moving average for this specific slot. For instance,
101 * for values 1800, 1123, 1453, ... fall under the same slot (10) and
102 * the exponential moving average is computed every time a new value
103 * is stored at this slot.
105 * 3. Exponential Moving Average
107 * The EMA is largely used to track a signal for stocks or as a low
108 * pass filter. The magic of the formula, is it is very simple and the
109 * reactivity of the average can be tuned with the factors called
112 * The higher the alphas are, the faster the average respond to the
113 * signal change. In our case, if a slot in the array is a big
114 * interval, we can have numbers with a big difference between
115 * them. The impact of those differences in the average computation
116 * can be tuned by changing the alpha value.
119 * -- The algorithm --
121 * We saw the different processing above, now let's see how they are
124 * For each interrupt:
126 * Compute the index = ilog2(interval)
127 * Compute a new_ema(buffer[index], interval)
128 * Store the index in a circular buffer
130 * Compute the suffix array of the indexes
133 * If the suffix is reverse-found 3 times
138 * However we can not have endless suffix array to be build, it won't
139 * make sense and it will add an extra overhead, so we can restrict
140 * this to a maximum suffix length of 5 and a minimum suffix length of
141 * 2. The experience showed 5 is the majority of the maximum pattern
142 * period found for different devices.
144 * The result is a pattern finding less than 1us for an interrupt.
146 * Example based on real values:
148 * Example 1 : MMC write/read interrupt interval:
150 * 223947, 1240, 1384, 1386, 1386,
151 * 217416, 1236, 1384, 1386, 1387,
152 * 214719, 1241, 1386, 1387, 1384,
153 * 213696, 1234, 1384, 1386, 1388,
154 * 219904, 1240, 1385, 1389, 1385,
155 * 212240, 1240, 1386, 1386, 1386,
156 * 214415, 1236, 1384, 1386, 1387,
157 * 214276, 1234, 1384, 1388, ?
159 * For each element, apply ilog2(value)
170 * Max period of 5, we take the last (max_period * 3) 15 elements as
171 * we can be confident if the pattern repeats itself three times it is
172 * a repeating pattern.
181 * 1) 8, 15, 8, 8, 8 <- max period
184 * 4) 8, 15 <- min period
186 * From there we search the repeating pattern for each suffix.
188 * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
189 * | | | | | | | | | | | | | | |
190 * 8, 15, 8, 8, 8 | | | | | | | | | |
191 * 8, 15, 8, 8, 8 | | | | |
194 * When moving the suffix, we found exactly 3 matches.
196 * The first suffix with period 5 is repeating.
198 * The next event is (3 * max_period) % suffix_period
200 * In this example, the result 0, so the next event is suffix[0] => 8
202 * However, 8 is the index in the array of exponential moving average
203 * which was calculated on the fly when storing the values, so the
204 * interval is ema[8] = 1366
244 * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
249 * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
250 * | | | | | | | | | | | | | | |
251 * 0, 0, 4, 0, | | | | | | | | | | |
252 * 0, 0, 4, 0, | | | | | | |
256 * Pattern is found 3 times, the remaining is 1 which results from
257 * (max_period * 3) % suffix_period. This value is the index in the
258 * suffix arrays. The suffix array for a period 4 has the value 4
261 #define EMA_ALPHA_VAL 64
262 #define EMA_ALPHA_SHIFT 7
264 #define PREDICTION_PERIOD_MIN 3
265 #define PREDICTION_PERIOD_MAX 5
266 #define PREDICTION_FACTOR 4
267 #define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
268 #define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
271 * Number of elements in the circular buffer: If it happens it was
272 * flushed before, then the number of elements could be smaller than
273 * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
274 * used as we wrapped. The index begins from zero when we did not
275 * wrap. That could be done in a nicer way with the proper circular
276 * array structure type but with the cost of extra computation in the
277 * interrupt handler hot path. We choose efficiency.
279 #define for_each_irqts(i, irqts) \
280 for (i = irqts->count < IRQ_TIMINGS_SIZE ? \
281 0 : irqts->count & IRQ_TIMINGS_MASK, \
282 irqts->count = min(IRQ_TIMINGS_SIZE, \
284 irqts->count > 0; irqts->count--, \
285 i = (i + 1) & IRQ_TIMINGS_MASK)
289 u64 ema_time[PREDICTION_BUFFER_SIZE];
290 int timings[IRQ_TIMINGS_SIZE];
291 int circ_timings[IRQ_TIMINGS_SIZE];
296 * Exponential moving average computation
298 static u64 irq_timings_ema_new(u64 value, u64 ema_old)
302 if (unlikely(!ema_old))
305 diff = (value - ema_old) * EMA_ALPHA_VAL;
307 * We can use a s64 type variable to be added with the u64
308 * ema_old variable as this one will never have its topmost
309 * bit set, it will be always smaller than 2^63 nanosec
310 * interrupt interval (292 years).
312 return ema_old + (diff >> EMA_ALPHA_SHIFT);
315 static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
320 * Move the beginning pointer to the end minus the max period x 3.
321 * We are at the point we can begin searching the pattern
323 buffer = &buffer[len - (period_max * 3)];
325 /* Adjust the length to the maximum allowed period x 3 */
326 len = period_max * 3;
329 * The buffer contains the suite of intervals, in a ilog2
330 * basis, we are looking for a repetition. We point the
331 * beginning of the search three times the length of the
332 * period beginning at the end of the buffer. We do that for
335 for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
338 * The first comparison always succeed because the
339 * suffix is deduced from the first n-period bytes of
340 * the buffer and we compare the initial suffix with
341 * itself, so we can skip the first iteration.
344 size_t size = period;
347 * We look if the suite with period 'i' repeat
348 * itself. If it is truncated at the end, as it
349 * repeats we can use the period to find out the next
350 * element with the modulo.
352 while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
355 * Move the index in a period basis
360 * If this condition is reached, all previous
361 * memcmp were successful, so the period is
365 return buffer[len % period];
368 * If the remaining elements to compare are
369 * smaller than the period, readjust the size
370 * of the comparison for the last iteration.
372 if (len - idx < period)
380 static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
382 int index, i, period_max, count, start, min = INT_MAX;
384 if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
385 irqs->count = irqs->last_ts = 0;
390 * As we want to find three times the repetition, we need a
391 * number of intervals greater or equal to three times the
392 * maximum period, otherwise we truncate the max period.
394 period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
395 PREDICTION_PERIOD_MAX : irqs->count / 3;
398 * If we don't have enough irq timings for this prediction,
401 if (period_max <= PREDICTION_PERIOD_MIN)
405 * 'count' will depends if the circular buffer wrapped or not
407 count = irqs->count < IRQ_TIMINGS_SIZE ?
408 irqs->count : IRQ_TIMINGS_SIZE;
410 start = irqs->count < IRQ_TIMINGS_SIZE ?
411 0 : (irqs->count & IRQ_TIMINGS_MASK);
414 * Copy the content of the circular buffer into another buffer
415 * in order to linearize the buffer instead of dealing with
416 * wrapping indexes and shifted array which will be prone to
417 * error and extremelly difficult to debug.
419 for (i = 0; i < count; i++) {
420 int index = (start + i) & IRQ_TIMINGS_MASK;
422 irqs->timings[i] = irqs->circ_timings[index];
423 min = min_t(int, irqs->timings[i], min);
426 index = irq_timings_next_event_index(irqs->timings, count, period_max);
428 return irqs->last_ts + irqs->ema_time[min];
430 return irqs->last_ts + irqs->ema_time[index];
433 static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
435 u64 old_ts = irqs->last_ts;
440 * The timestamps are absolute time values, we need to compute
441 * the timing interval between two interrupts.
446 * The interval type is u64 in order to deal with the same
447 * type in our computation, that prevent mindfuck issues with
448 * overflow, sign and division.
450 interval = ts - old_ts;
453 * The interrupt triggered more than one second apart, that
454 * ends the sequence as predictible for our purpose. In this
455 * case, assume we have the beginning of a sequence and the
456 * timestamp is the first value. As it is impossible to
457 * predict anything at this point, return.
459 * Note the first timestamp of the sequence will always fall
460 * in this test because the old_ts is zero. That is what we
461 * want as we need another timestamp to compute an interval.
463 if (interval >= NSEC_PER_SEC) {
469 * Get the index in the ema table for this interrupt. The
470 * PREDICTION_FACTOR increase the interval size for the array
471 * of exponential average.
473 index = likely(interval) ?
474 ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
477 * Store the index as an element of the pattern in another
480 irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
482 irqs->ema_time[index] = irq_timings_ema_new(interval,
483 irqs->ema_time[index]);
489 * irq_timings_next_event - Return when the next event is supposed to arrive
491 * During the last busy cycle, the number of interrupts is incremented
492 * and stored in the irq_timings structure. This information is
495 * - know if the index in the table wrapped up:
497 * If more than the array size interrupts happened during the
498 * last busy/idle cycle, the index wrapped up and we have to
499 * begin with the next element in the array which is the last one
500 * in the sequence, otherwise it is a the index 0.
502 * - have an indication of the interrupts activity on this CPU
505 * The values are 'consumed' after inserting in the statistical model,
506 * thus the count is reinitialized.
508 * The array of values **must** be browsed in the time direction, the
509 * timestamp must increase between an element and the next one.
511 * Returns a nanosec time based estimation of the earliest interrupt,
514 u64 irq_timings_next_event(u64 now)
516 struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
517 struct irqt_stat *irqs;
518 struct irqt_stat __percpu *s;
519 u64 ts, next_evt = U64_MAX;
523 * This function must be called with the local irq disabled in
524 * order to prevent the timings circular buffer to be updated
525 * while we are reading it.
527 lockdep_assert_irqs_disabled();
533 * Number of elements in the circular buffer: If it happens it
534 * was flushed before, then the number of elements could be
535 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
536 * otherwise the array size is used as we wrapped. The index
537 * begins from zero when we did not wrap. That could be done
538 * in a nicer way with the proper circular array structure
539 * type but with the cost of extra computation in the
540 * interrupt handler hot path. We choose efficiency.
542 * Inject measured irq/timestamp to the pattern prediction
543 * model while decrementing the counter because we consume the
544 * data from our circular buffer.
546 for_each_irqts(i, irqts) {
547 irq = irq_timing_decode(irqts->values[i], &ts);
548 s = idr_find(&irqt_stats, irq);
550 irq_timings_store(irq, this_cpu_ptr(s), ts);
554 * Look in the list of interrupts' statistics, the earliest
557 idr_for_each_entry(&irqt_stats, s, i) {
559 irqs = this_cpu_ptr(s);
561 ts = __irq_timings_next_event(irqs, i, now);
572 void irq_timings_free(int irq)
574 struct irqt_stat __percpu *s;
576 s = idr_find(&irqt_stats, irq);
579 idr_remove(&irqt_stats, irq);
583 int irq_timings_alloc(int irq)
585 struct irqt_stat __percpu *s;
589 * Some platforms can have the same private interrupt per cpu,
590 * so this function may be be called several times with the
591 * same interrupt number. Just bail out in case the per cpu
592 * stat structure is already allocated.
594 s = idr_find(&irqt_stats, irq);
598 s = alloc_percpu(*s);
602 idr_preload(GFP_KERNEL);
603 id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);