Merge branches 'pm-opp' and 'pm-cpufreq'
[linux-2.6-microblaze.git] / kernel / trace / trace_osnoise.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4  * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5  *
6  * Based on "hwlat_detector" tracer by:
7  *   Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8  *   Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9  *   With feedback from Clark Williams <williams@redhat.com>
10  *
11  * And also based on the rtsl tracer presented on:
12  *  DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13  *  scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14  *  (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15  *
16  * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17  */
18
19 #include <linux/kthread.h>
20 #include <linux/tracefs.h>
21 #include <linux/uaccess.h>
22 #include <linux/cpumask.h>
23 #include <linux/delay.h>
24 #include <linux/sched/clock.h>
25 #include <uapi/linux/sched/types.h>
26 #include <linux/sched.h>
27 #include "trace.h"
28
29 #ifdef CONFIG_X86_LOCAL_APIC
30 #include <asm/trace/irq_vectors.h>
31 #undef TRACE_INCLUDE_PATH
32 #undef TRACE_INCLUDE_FILE
33 #endif /* CONFIG_X86_LOCAL_APIC */
34
35 #include <trace/events/irq.h>
36 #include <trace/events/sched.h>
37
38 #define CREATE_TRACE_POINTS
39 #include <trace/events/osnoise.h>
40
41 static struct trace_array       *osnoise_trace;
42
43 /*
44  * Default values.
45  */
46 #define BANNER                  "osnoise: "
47 #define DEFAULT_SAMPLE_PERIOD   1000000                 /* 1s */
48 #define DEFAULT_SAMPLE_RUNTIME  1000000                 /* 1s */
49
50 #define DEFAULT_TIMERLAT_PERIOD 1000                    /* 1ms */
51 #define DEFAULT_TIMERLAT_PRIO   95                      /* FIFO 95 */
52
53 /*
54  * NMI runtime info.
55  */
56 struct osn_nmi {
57         u64     count;
58         u64     delta_start;
59 };
60
61 /*
62  * IRQ runtime info.
63  */
64 struct osn_irq {
65         u64     count;
66         u64     arrival_time;
67         u64     delta_start;
68 };
69
70 #define IRQ_CONTEXT     0
71 #define THREAD_CONTEXT  1
72 /*
73  * sofirq runtime info.
74  */
75 struct osn_softirq {
76         u64     count;
77         u64     arrival_time;
78         u64     delta_start;
79 };
80
81 /*
82  * thread runtime info.
83  */
84 struct osn_thread {
85         u64     count;
86         u64     arrival_time;
87         u64     delta_start;
88 };
89
90 /*
91  * Runtime information: this structure saves the runtime information used by
92  * one sampling thread.
93  */
94 struct osnoise_variables {
95         struct task_struct      *kthread;
96         bool                    sampling;
97         pid_t                   pid;
98         struct osn_nmi          nmi;
99         struct osn_irq          irq;
100         struct osn_softirq      softirq;
101         struct osn_thread       thread;
102         local_t                 int_counter;
103 };
104
105 /*
106  * Per-cpu runtime information.
107  */
108 DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
109
110 /*
111  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
112  */
113 static inline struct osnoise_variables *this_cpu_osn_var(void)
114 {
115         return this_cpu_ptr(&per_cpu_osnoise_var);
116 }
117
118 #ifdef CONFIG_TIMERLAT_TRACER
119 /*
120  * Runtime information for the timer mode.
121  */
122 struct timerlat_variables {
123         struct task_struct      *kthread;
124         struct hrtimer          timer;
125         u64                     rel_period;
126         u64                     abs_period;
127         bool                    tracing_thread;
128         u64                     count;
129 };
130
131 DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
132
133 /*
134  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
135  */
136 static inline struct timerlat_variables *this_cpu_tmr_var(void)
137 {
138         return this_cpu_ptr(&per_cpu_timerlat_var);
139 }
140
141 /*
142  * tlat_var_reset - Reset the values of the given timerlat_variables
143  */
144 static inline void tlat_var_reset(void)
145 {
146         struct timerlat_variables *tlat_var;
147         int cpu;
148         /*
149          * So far, all the values are initialized as 0, so
150          * zeroing the structure is perfect.
151          */
152         for_each_cpu(cpu, cpu_online_mask) {
153                 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
154                 memset(tlat_var, 0, sizeof(*tlat_var));
155         }
156 }
157 #else /* CONFIG_TIMERLAT_TRACER */
158 #define tlat_var_reset()        do {} while (0)
159 #endif /* CONFIG_TIMERLAT_TRACER */
160
161 /*
162  * osn_var_reset - Reset the values of the given osnoise_variables
163  */
164 static inline void osn_var_reset(void)
165 {
166         struct osnoise_variables *osn_var;
167         int cpu;
168
169         /*
170          * So far, all the values are initialized as 0, so
171          * zeroing the structure is perfect.
172          */
173         for_each_cpu(cpu, cpu_online_mask) {
174                 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
175                 memset(osn_var, 0, sizeof(*osn_var));
176         }
177 }
178
179 /*
180  * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
181  */
182 static inline void osn_var_reset_all(void)
183 {
184         osn_var_reset();
185         tlat_var_reset();
186 }
187
188 /*
189  * Tells NMIs to call back to the osnoise tracer to record timestamps.
190  */
191 bool trace_osnoise_callback_enabled;
192
193 /*
194  * osnoise sample structure definition. Used to store the statistics of a
195  * sample run.
196  */
197 struct osnoise_sample {
198         u64                     runtime;        /* runtime */
199         u64                     noise;          /* noise */
200         u64                     max_sample;     /* max single noise sample */
201         int                     hw_count;       /* # HW (incl. hypervisor) interference */
202         int                     nmi_count;      /* # NMIs during this sample */
203         int                     irq_count;      /* # IRQs during this sample */
204         int                     softirq_count;  /* # softirqs during this sample */
205         int                     thread_count;   /* # threads during this sample */
206 };
207
208 #ifdef CONFIG_TIMERLAT_TRACER
209 /*
210  * timerlat sample structure definition. Used to store the statistics of
211  * a sample run.
212  */
213 struct timerlat_sample {
214         u64                     timer_latency;  /* timer_latency */
215         unsigned int            seqnum;         /* unique sequence */
216         int                     context;        /* timer context */
217 };
218 #endif
219
220 /*
221  * Protect the interface.
222  */
223 struct mutex interface_lock;
224
225 /*
226  * Tracer data.
227  */
228 static struct osnoise_data {
229         u64     sample_period;          /* total sampling period */
230         u64     sample_runtime;         /* active sampling portion of period */
231         u64     stop_tracing;           /* stop trace in the internal operation (loop/irq) */
232         u64     stop_tracing_total;     /* stop trace in the final operation (report/thread) */
233 #ifdef CONFIG_TIMERLAT_TRACER
234         u64     timerlat_period;        /* timerlat period */
235         u64     print_stack;            /* print IRQ stack if total > */
236         int     timerlat_tracer;        /* timerlat tracer */
237 #endif
238         bool    tainted;                /* infor users and developers about a problem */
239 } osnoise_data = {
240         .sample_period                  = DEFAULT_SAMPLE_PERIOD,
241         .sample_runtime                 = DEFAULT_SAMPLE_RUNTIME,
242         .stop_tracing                   = 0,
243         .stop_tracing_total             = 0,
244 #ifdef CONFIG_TIMERLAT_TRACER
245         .print_stack                    = 0,
246         .timerlat_period                = DEFAULT_TIMERLAT_PERIOD,
247         .timerlat_tracer                = 0,
248 #endif
249 };
250
251 /*
252  * Boolean variable used to inform that the tracer is currently sampling.
253  */
254 static bool osnoise_busy;
255
256 #ifdef CONFIG_PREEMPT_RT
257 /*
258  * Print the osnoise header info.
259  */
260 static void print_osnoise_headers(struct seq_file *s)
261 {
262         if (osnoise_data.tainted)
263                 seq_puts(s, "# osnoise is tainted!\n");
264
265         seq_puts(s, "#                                _-------=> irqs-off\n");
266         seq_puts(s, "#                               / _------=> need-resched\n");
267         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
268         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
269         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
270         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
271         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
272
273         seq_puts(s, "#                              |||||| /          ");
274         seq_puts(s, "                                     MAX\n");
275
276         seq_puts(s, "#                              ||||| /                         ");
277         seq_puts(s, "                    SINGLE      Interference counters:\n");
278
279         seq_puts(s, "#                              |||||||               RUNTIME   ");
280         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
281
282         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    IN US    ");
283         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
284
285         seq_puts(s, "#              | |         |   |||||||      |           |      ");
286         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
287 }
288 #else /* CONFIG_PREEMPT_RT */
289 static void print_osnoise_headers(struct seq_file *s)
290 {
291         if (osnoise_data.tainted)
292                 seq_puts(s, "# osnoise is tainted!\n");
293
294         seq_puts(s, "#                                _-----=> irqs-off\n");
295         seq_puts(s, "#                               / _----=> need-resched\n");
296         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
297         seq_puts(s, "#                              || / _--=> preempt-depth\n");
298         seq_puts(s, "#                              ||| / _-=> migrate-disable     ");
299         seq_puts(s, "                    MAX\n");
300         seq_puts(s, "#                              |||| /     delay               ");
301         seq_puts(s, "                    SINGLE      Interference counters:\n");
302
303         seq_puts(s, "#                              |||||               RUNTIME   ");
304         seq_puts(s, "   NOISE  %% OF CPU  NOISE    +-----------------------------+\n");
305
306         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP    IN US    ");
307         seq_puts(s, "   IN US  AVAILABLE  IN US     HW    NMI    IRQ   SIRQ THREAD\n");
308
309         seq_puts(s, "#              | |         |   |||||      |           |      ");
310         seq_puts(s, "       |    |            |      |      |      |      |      |\n");
311 }
312 #endif /* CONFIG_PREEMPT_RT */
313
314 /*
315  * osnoise_taint - report an osnoise error.
316  */
317 #define osnoise_taint(msg) ({                                                   \
318         struct trace_array *tr = osnoise_trace;                                 \
319                                                                                 \
320         trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, msg);        \
321         osnoise_data.tainted = true;                                            \
322 })
323
324 /*
325  * Record an osnoise_sample into the tracer buffer.
326  */
327 static void trace_osnoise_sample(struct osnoise_sample *sample)
328 {
329         struct trace_array *tr = osnoise_trace;
330         struct trace_buffer *buffer = tr->array_buffer.buffer;
331         struct trace_event_call *call = &event_osnoise;
332         struct ring_buffer_event *event;
333         struct osnoise_entry *entry;
334
335         event = trace_buffer_lock_reserve(buffer, TRACE_OSNOISE, sizeof(*entry),
336                                           tracing_gen_ctx());
337         if (!event)
338                 return;
339         entry   = ring_buffer_event_data(event);
340         entry->runtime          = sample->runtime;
341         entry->noise            = sample->noise;
342         entry->max_sample       = sample->max_sample;
343         entry->hw_count         = sample->hw_count;
344         entry->nmi_count        = sample->nmi_count;
345         entry->irq_count        = sample->irq_count;
346         entry->softirq_count    = sample->softirq_count;
347         entry->thread_count     = sample->thread_count;
348
349         if (!call_filter_check_discard(call, entry, buffer, event))
350                 trace_buffer_unlock_commit_nostack(buffer, event);
351 }
352
353 #ifdef CONFIG_TIMERLAT_TRACER
354 /*
355  * Print the timerlat header info.
356  */
357 #ifdef CONFIG_PREEMPT_RT
358 static void print_timerlat_headers(struct seq_file *s)
359 {
360         seq_puts(s, "#                                _-------=> irqs-off\n");
361         seq_puts(s, "#                               / _------=> need-resched\n");
362         seq_puts(s, "#                              | / _-----=> need-resched-lazy\n");
363         seq_puts(s, "#                              || / _----=> hardirq/softirq\n");
364         seq_puts(s, "#                              ||| / _---=> preempt-depth\n");
365         seq_puts(s, "#                              |||| / _--=> preempt-lazy-depth\n");
366         seq_puts(s, "#                              ||||| / _-=> migrate-disable\n");
367         seq_puts(s, "#                              |||||| /\n");
368         seq_puts(s, "#                              |||||||             ACTIVATION\n");
369         seq_puts(s, "#           TASK-PID      CPU# |||||||   TIMESTAMP    ID     ");
370         seq_puts(s, "       CONTEXT                LATENCY\n");
371         seq_puts(s, "#              | |         |   |||||||      |         |      ");
372         seq_puts(s, "            |                       |\n");
373 }
374 #else /* CONFIG_PREEMPT_RT */
375 static void print_timerlat_headers(struct seq_file *s)
376 {
377         seq_puts(s, "#                                _-----=> irqs-off\n");
378         seq_puts(s, "#                               / _----=> need-resched\n");
379         seq_puts(s, "#                              | / _---=> hardirq/softirq\n");
380         seq_puts(s, "#                              || / _--=> preempt-depth\n");
381         seq_puts(s, "#                              ||| / _-=> migrate-disable\n");
382         seq_puts(s, "#                              |||| /     delay\n");
383         seq_puts(s, "#                              |||||            ACTIVATION\n");
384         seq_puts(s, "#           TASK-PID      CPU# |||||   TIMESTAMP   ID      ");
385         seq_puts(s, "      CONTEXT                 LATENCY\n");
386         seq_puts(s, "#              | |         |   |||||      |         |      ");
387         seq_puts(s, "            |                       |\n");
388 }
389 #endif /* CONFIG_PREEMPT_RT */
390
391 /*
392  * Record an timerlat_sample into the tracer buffer.
393  */
394 static void trace_timerlat_sample(struct timerlat_sample *sample)
395 {
396         struct trace_array *tr = osnoise_trace;
397         struct trace_event_call *call = &event_osnoise;
398         struct trace_buffer *buffer = tr->array_buffer.buffer;
399         struct ring_buffer_event *event;
400         struct timerlat_entry *entry;
401
402         event = trace_buffer_lock_reserve(buffer, TRACE_TIMERLAT, sizeof(*entry),
403                                           tracing_gen_ctx());
404         if (!event)
405                 return;
406         entry   = ring_buffer_event_data(event);
407         entry->seqnum                   = sample->seqnum;
408         entry->context                  = sample->context;
409         entry->timer_latency            = sample->timer_latency;
410
411         if (!call_filter_check_discard(call, entry, buffer, event))
412                 trace_buffer_unlock_commit_nostack(buffer, event);
413 }
414
415 #ifdef CONFIG_STACKTRACE
416
417 #define MAX_CALLS       256
418
419 /*
420  * Stack trace will take place only at IRQ level, so, no need
421  * to control nesting here.
422  */
423 struct trace_stack {
424         int             stack_size;
425         int             nr_entries;
426         unsigned long   calls[MAX_CALLS];
427 };
428
429 static DEFINE_PER_CPU(struct trace_stack, trace_stack);
430
431 /*
432  * timerlat_save_stack - save a stack trace without printing
433  *
434  * Save the current stack trace without printing. The
435  * stack will be printed later, after the end of the measurement.
436  */
437 static void timerlat_save_stack(int skip)
438 {
439         unsigned int size, nr_entries;
440         struct trace_stack *fstack;
441
442         fstack = this_cpu_ptr(&trace_stack);
443
444         size = ARRAY_SIZE(fstack->calls);
445
446         nr_entries = stack_trace_save(fstack->calls, size, skip);
447
448         fstack->stack_size = nr_entries * sizeof(unsigned long);
449         fstack->nr_entries = nr_entries;
450
451         return;
452
453 }
454 /*
455  * timerlat_dump_stack - dump a stack trace previously saved
456  *
457  * Dump a saved stack trace into the trace buffer.
458  */
459 static void timerlat_dump_stack(void)
460 {
461         struct trace_event_call *call = &event_osnoise;
462         struct trace_array *tr = osnoise_trace;
463         struct trace_buffer *buffer = tr->array_buffer.buffer;
464         struct ring_buffer_event *event;
465         struct trace_stack *fstack;
466         struct stack_entry *entry;
467         unsigned int size;
468
469         preempt_disable_notrace();
470         fstack = this_cpu_ptr(&trace_stack);
471         size = fstack->stack_size;
472
473         event = trace_buffer_lock_reserve(buffer, TRACE_STACK, sizeof(*entry) + size,
474                                           tracing_gen_ctx());
475         if (!event)
476                 goto out;
477
478         entry = ring_buffer_event_data(event);
479
480         memcpy(&entry->caller, fstack->calls, size);
481         entry->size = fstack->nr_entries;
482
483         if (!call_filter_check_discard(call, entry, buffer, event))
484                 trace_buffer_unlock_commit_nostack(buffer, event);
485
486 out:
487         preempt_enable_notrace();
488 }
489 #else
490 #define timerlat_dump_stack() do {} while (0)
491 #define timerlat_save_stack(a) do {} while (0)
492 #endif /* CONFIG_STACKTRACE */
493 #endif /* CONFIG_TIMERLAT_TRACER */
494
495 /*
496  * Macros to encapsulate the time capturing infrastructure.
497  */
498 #define time_get()      trace_clock_local()
499 #define time_to_us(x)   div_u64(x, 1000)
500 #define time_sub(a, b)  ((a) - (b))
501
502 /*
503  * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
504  *
505  * If an IRQ is preempted by an NMI, its delta_start is pushed forward
506  * to discount the NMI interference.
507  *
508  * See get_int_safe_duration().
509  */
510 static inline void
511 cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
512 {
513         if (osn_var->irq.delta_start)
514                 osn_var->irq.delta_start += duration;
515 }
516
517 #ifndef CONFIG_PREEMPT_RT
518 /*
519  * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
520  *
521  * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
522  * forward to discount the interference.
523  *
524  * See get_int_safe_duration().
525  */
526 static inline void
527 cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
528 {
529         if (osn_var->softirq.delta_start)
530                 osn_var->softirq.delta_start += duration;
531 }
532 #else /* CONFIG_PREEMPT_RT */
533 #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
534 #endif
535
536 /*
537  * cond_move_thread_delta_start - Forward the delta_start of a running thread
538  *
539  * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
540  * is pushed forward to discount the interference.
541  *
542  * See get_int_safe_duration().
543  */
544 static inline void
545 cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
546 {
547         if (osn_var->thread.delta_start)
548                 osn_var->thread.delta_start += duration;
549 }
550
551 /*
552  * get_int_safe_duration - Get the duration of a window
553  *
554  * The irq, softirq and thread varaibles need to have its duration without
555  * the interference from higher priority interrupts. Instead of keeping a
556  * variable to discount the interrupt interference from these variables, the
557  * starting time of these variables are pushed forward with the interrupt's
558  * duration. In this way, a single variable is used to:
559  *
560  *   - Know if a given window is being measured.
561  *   - Account its duration.
562  *   - Discount the interference.
563  *
564  * To avoid getting inconsistent values, e.g.,:
565  *
566  *      now = time_get()
567  *              --->    interrupt!
568  *                      delta_start -= int duration;
569  *              <---
570  *      duration = now - delta_start;
571  *
572  *      result: negative duration if the variable duration before the
573  *      interrupt was smaller than the interrupt execution.
574  *
575  * A counter of interrupts is used. If the counter increased, try
576  * to capture an interference safe duration.
577  */
578 static inline s64
579 get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
580 {
581         u64 int_counter, now;
582         s64 duration;
583
584         do {
585                 int_counter = local_read(&osn_var->int_counter);
586                 /* synchronize with interrupts */
587                 barrier();
588
589                 now = time_get();
590                 duration = (now - *delta_start);
591
592                 /* synchronize with interrupts */
593                 barrier();
594         } while (int_counter != local_read(&osn_var->int_counter));
595
596         /*
597          * This is an evidence of race conditions that cause
598          * a value to be "discounted" too much.
599          */
600         if (duration < 0)
601                 osnoise_taint("Negative duration!\n");
602
603         *delta_start = 0;
604
605         return duration;
606 }
607
608 /*
609  *
610  * set_int_safe_time - Save the current time on *time, aware of interference
611  *
612  * Get the time, taking into consideration a possible interference from
613  * higher priority interrupts.
614  *
615  * See get_int_safe_duration() for an explanation.
616  */
617 static u64
618 set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
619 {
620         u64 int_counter;
621
622         do {
623                 int_counter = local_read(&osn_var->int_counter);
624                 /* synchronize with interrupts */
625                 barrier();
626
627                 *time = time_get();
628
629                 /* synchronize with interrupts */
630                 barrier();
631         } while (int_counter != local_read(&osn_var->int_counter));
632
633         return int_counter;
634 }
635
636 #ifdef CONFIG_TIMERLAT_TRACER
637 /*
638  * copy_int_safe_time - Copy *src into *desc aware of interference
639  */
640 static u64
641 copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
642 {
643         u64 int_counter;
644
645         do {
646                 int_counter = local_read(&osn_var->int_counter);
647                 /* synchronize with interrupts */
648                 barrier();
649
650                 *dst = *src;
651
652                 /* synchronize with interrupts */
653                 barrier();
654         } while (int_counter != local_read(&osn_var->int_counter));
655
656         return int_counter;
657 }
658 #endif /* CONFIG_TIMERLAT_TRACER */
659
660 /*
661  * trace_osnoise_callback - NMI entry/exit callback
662  *
663  * This function is called at the entry and exit NMI code. The bool enter
664  * distinguishes between either case. This function is used to note a NMI
665  * occurrence, compute the noise caused by the NMI, and to remove the noise
666  * it is potentially causing on other interference variables.
667  */
668 void trace_osnoise_callback(bool enter)
669 {
670         struct osnoise_variables *osn_var = this_cpu_osn_var();
671         u64 duration;
672
673         if (!osn_var->sampling)
674                 return;
675
676         /*
677          * Currently trace_clock_local() calls sched_clock() and the
678          * generic version is not NMI safe.
679          */
680         if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
681                 if (enter) {
682                         osn_var->nmi.delta_start = time_get();
683                         local_inc(&osn_var->int_counter);
684                 } else {
685                         duration = time_get() - osn_var->nmi.delta_start;
686
687                         trace_nmi_noise(osn_var->nmi.delta_start, duration);
688
689                         cond_move_irq_delta_start(osn_var, duration);
690                         cond_move_softirq_delta_start(osn_var, duration);
691                         cond_move_thread_delta_start(osn_var, duration);
692                 }
693         }
694
695         if (enter)
696                 osn_var->nmi.count++;
697 }
698
699 /*
700  * osnoise_trace_irq_entry - Note the starting of an IRQ
701  *
702  * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
703  * it is safe to use a single variable (ons_var->irq) to save the statistics.
704  * The arrival_time is used to report... the arrival time. The delta_start
705  * is used to compute the duration at the IRQ exit handler. See
706  * cond_move_irq_delta_start().
707  */
708 void osnoise_trace_irq_entry(int id)
709 {
710         struct osnoise_variables *osn_var = this_cpu_osn_var();
711
712         if (!osn_var->sampling)
713                 return;
714         /*
715          * This value will be used in the report, but not to compute
716          * the execution time, so it is safe to get it unsafe.
717          */
718         osn_var->irq.arrival_time = time_get();
719         set_int_safe_time(osn_var, &osn_var->irq.delta_start);
720         osn_var->irq.count++;
721
722         local_inc(&osn_var->int_counter);
723 }
724
725 /*
726  * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
727  *
728  * Computes the duration of the IRQ noise, and trace it. Also discounts the
729  * interference from other sources of noise could be currently being accounted.
730  */
731 void osnoise_trace_irq_exit(int id, const char *desc)
732 {
733         struct osnoise_variables *osn_var = this_cpu_osn_var();
734         int duration;
735
736         if (!osn_var->sampling)
737                 return;
738
739         duration = get_int_safe_duration(osn_var, &osn_var->irq.delta_start);
740         trace_irq_noise(id, desc, osn_var->irq.arrival_time, duration);
741         osn_var->irq.arrival_time = 0;
742         cond_move_softirq_delta_start(osn_var, duration);
743         cond_move_thread_delta_start(osn_var, duration);
744 }
745
746 /*
747  * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
748  *
749  * Used to note the starting of an IRQ occurece.
750  */
751 static void trace_irqentry_callback(void *data, int irq,
752                                     struct irqaction *action)
753 {
754         osnoise_trace_irq_entry(irq);
755 }
756
757 /*
758  * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
759  *
760  * Used to note the end of an IRQ occurece.
761  */
762 static void trace_irqexit_callback(void *data, int irq,
763                                    struct irqaction *action, int ret)
764 {
765         osnoise_trace_irq_exit(irq, action->name);
766 }
767
768 /*
769  * arch specific register function.
770  */
771 int __weak osnoise_arch_register(void)
772 {
773         return 0;
774 }
775
776 /*
777  * arch specific unregister function.
778  */
779 void __weak osnoise_arch_unregister(void)
780 {
781         return;
782 }
783
784 /*
785  * hook_irq_events - Hook IRQ handling events
786  *
787  * This function hooks the IRQ related callbacks to the respective trace
788  * events.
789  */
790 static int hook_irq_events(void)
791 {
792         int ret;
793
794         ret = register_trace_irq_handler_entry(trace_irqentry_callback, NULL);
795         if (ret)
796                 goto out_err;
797
798         ret = register_trace_irq_handler_exit(trace_irqexit_callback, NULL);
799         if (ret)
800                 goto out_unregister_entry;
801
802         ret = osnoise_arch_register();
803         if (ret)
804                 goto out_irq_exit;
805
806         return 0;
807
808 out_irq_exit:
809         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
810 out_unregister_entry:
811         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
812 out_err:
813         return -EINVAL;
814 }
815
816 /*
817  * unhook_irq_events - Unhook IRQ handling events
818  *
819  * This function unhooks the IRQ related callbacks to the respective trace
820  * events.
821  */
822 static void unhook_irq_events(void)
823 {
824         osnoise_arch_unregister();
825         unregister_trace_irq_handler_exit(trace_irqexit_callback, NULL);
826         unregister_trace_irq_handler_entry(trace_irqentry_callback, NULL);
827 }
828
829 #ifndef CONFIG_PREEMPT_RT
830 /*
831  * trace_softirq_entry_callback - Note the starting of a softirq
832  *
833  * Save the starting time of a softirq. As softirqs are non-preemptive to
834  * other softirqs, it is safe to use a single variable (ons_var->softirq)
835  * to save the statistics. The arrival_time is used to report... the
836  * arrival time. The delta_start is used to compute the duration at the
837  * softirq exit handler. See cond_move_softirq_delta_start().
838  */
839 static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
840 {
841         struct osnoise_variables *osn_var = this_cpu_osn_var();
842
843         if (!osn_var->sampling)
844                 return;
845         /*
846          * This value will be used in the report, but not to compute
847          * the execution time, so it is safe to get it unsafe.
848          */
849         osn_var->softirq.arrival_time = time_get();
850         set_int_safe_time(osn_var, &osn_var->softirq.delta_start);
851         osn_var->softirq.count++;
852
853         local_inc(&osn_var->int_counter);
854 }
855
856 /*
857  * trace_softirq_exit_callback - Note the end of an softirq
858  *
859  * Computes the duration of the softirq noise, and trace it. Also discounts the
860  * interference from other sources of noise could be currently being accounted.
861  */
862 static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
863 {
864         struct osnoise_variables *osn_var = this_cpu_osn_var();
865         int duration;
866
867         if (!osn_var->sampling)
868                 return;
869
870 #ifdef CONFIG_TIMERLAT_TRACER
871         /*
872          * If the timerlat is enabled, but the irq handler did
873          * not run yet enabling timerlat_tracer, do not trace.
874          */
875         if (unlikely(osnoise_data.timerlat_tracer)) {
876                 struct timerlat_variables *tlat_var;
877                 tlat_var = this_cpu_tmr_var();
878                 if (!tlat_var->tracing_thread) {
879                         osn_var->softirq.arrival_time = 0;
880                         osn_var->softirq.delta_start = 0;
881                         return;
882                 }
883         }
884 #endif
885
886         duration = get_int_safe_duration(osn_var, &osn_var->softirq.delta_start);
887         trace_softirq_noise(vec_nr, osn_var->softirq.arrival_time, duration);
888         cond_move_thread_delta_start(osn_var, duration);
889         osn_var->softirq.arrival_time = 0;
890 }
891
892 /*
893  * hook_softirq_events - Hook softirq handling events
894  *
895  * This function hooks the softirq related callbacks to the respective trace
896  * events.
897  */
898 static int hook_softirq_events(void)
899 {
900         int ret;
901
902         ret = register_trace_softirq_entry(trace_softirq_entry_callback, NULL);
903         if (ret)
904                 goto out_err;
905
906         ret = register_trace_softirq_exit(trace_softirq_exit_callback, NULL);
907         if (ret)
908                 goto out_unreg_entry;
909
910         return 0;
911
912 out_unreg_entry:
913         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
914 out_err:
915         return -EINVAL;
916 }
917
918 /*
919  * unhook_softirq_events - Unhook softirq handling events
920  *
921  * This function hooks the softirq related callbacks to the respective trace
922  * events.
923  */
924 static void unhook_softirq_events(void)
925 {
926         unregister_trace_softirq_entry(trace_softirq_entry_callback, NULL);
927         unregister_trace_softirq_exit(trace_softirq_exit_callback, NULL);
928 }
929 #else /* CONFIG_PREEMPT_RT */
930 /*
931  * softirq are threads on the PREEMPT_RT mode.
932  */
933 static int hook_softirq_events(void)
934 {
935         return 0;
936 }
937 static void unhook_softirq_events(void)
938 {
939 }
940 #endif
941
942 /*
943  * thread_entry - Record the starting of a thread noise window
944  *
945  * It saves the context switch time for a noisy thread, and increments
946  * the interference counters.
947  */
948 static void
949 thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
950 {
951         if (!osn_var->sampling)
952                 return;
953         /*
954          * The arrival time will be used in the report, but not to compute
955          * the execution time, so it is safe to get it unsafe.
956          */
957         osn_var->thread.arrival_time = time_get();
958
959         set_int_safe_time(osn_var, &osn_var->thread.delta_start);
960
961         osn_var->thread.count++;
962         local_inc(&osn_var->int_counter);
963 }
964
965 /*
966  * thread_exit - Report the end of a thread noise window
967  *
968  * It computes the total noise from a thread, tracing if needed.
969  */
970 static void
971 thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
972 {
973         int duration;
974
975         if (!osn_var->sampling)
976                 return;
977
978 #ifdef CONFIG_TIMERLAT_TRACER
979         if (osnoise_data.timerlat_tracer) {
980                 struct timerlat_variables *tlat_var;
981                 tlat_var = this_cpu_tmr_var();
982                 if (!tlat_var->tracing_thread) {
983                         osn_var->thread.delta_start = 0;
984                         osn_var->thread.arrival_time = 0;
985                         return;
986                 }
987         }
988 #endif
989
990         duration = get_int_safe_duration(osn_var, &osn_var->thread.delta_start);
991
992         trace_thread_noise(t, osn_var->thread.arrival_time, duration);
993
994         osn_var->thread.arrival_time = 0;
995 }
996
997 /*
998  * trace_sched_switch - sched:sched_switch trace event handler
999  *
1000  * This function is hooked to the sched:sched_switch trace event, and it is
1001  * used to record the beginning and to report the end of a thread noise window.
1002  */
1003 static void
1004 trace_sched_switch_callback(void *data, bool preempt, struct task_struct *p,
1005                             struct task_struct *n)
1006 {
1007         struct osnoise_variables *osn_var = this_cpu_osn_var();
1008
1009         if (p->pid != osn_var->pid)
1010                 thread_exit(osn_var, p);
1011
1012         if (n->pid != osn_var->pid)
1013                 thread_entry(osn_var, n);
1014 }
1015
1016 /*
1017  * hook_thread_events - Hook the insturmentation for thread noise
1018  *
1019  * Hook the osnoise tracer callbacks to handle the noise from other
1020  * threads on the necessary kernel events.
1021  */
1022 static int hook_thread_events(void)
1023 {
1024         int ret;
1025
1026         ret = register_trace_sched_switch(trace_sched_switch_callback, NULL);
1027         if (ret)
1028                 return -EINVAL;
1029
1030         return 0;
1031 }
1032
1033 /*
1034  * unhook_thread_events - *nhook the insturmentation for thread noise
1035  *
1036  * Unook the osnoise tracer callbacks to handle the noise from other
1037  * threads on the necessary kernel events.
1038  */
1039 static void unhook_thread_events(void)
1040 {
1041         unregister_trace_sched_switch(trace_sched_switch_callback, NULL);
1042 }
1043
1044 /*
1045  * save_osn_sample_stats - Save the osnoise_sample statistics
1046  *
1047  * Save the osnoise_sample statistics before the sampling phase. These
1048  * values will be used later to compute the diff betwneen the statistics
1049  * before and after the osnoise sampling.
1050  */
1051 static void
1052 save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1053 {
1054         s->nmi_count = osn_var->nmi.count;
1055         s->irq_count = osn_var->irq.count;
1056         s->softirq_count = osn_var->softirq.count;
1057         s->thread_count = osn_var->thread.count;
1058 }
1059
1060 /*
1061  * diff_osn_sample_stats - Compute the osnoise_sample statistics
1062  *
1063  * After a sample period, compute the difference on the osnoise_sample
1064  * statistics. The struct osnoise_sample *s contains the statistics saved via
1065  * save_osn_sample_stats() before the osnoise sampling.
1066  */
1067 static void
1068 diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1069 {
1070         s->nmi_count = osn_var->nmi.count - s->nmi_count;
1071         s->irq_count = osn_var->irq.count - s->irq_count;
1072         s->softirq_count = osn_var->softirq.count - s->softirq_count;
1073         s->thread_count = osn_var->thread.count - s->thread_count;
1074 }
1075
1076 /*
1077  * osnoise_stop_tracing - Stop tracing and the tracer.
1078  */
1079 static __always_inline void osnoise_stop_tracing(void)
1080 {
1081         struct trace_array *tr = osnoise_trace;
1082
1083         trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
1084                         "stop tracing hit on cpu %d\n", smp_processor_id());
1085
1086         tracer_tracing_off(tr);
1087 }
1088
1089 /*
1090  * run_osnoise - Sample the time and look for osnoise
1091  *
1092  * Used to capture the time, looking for potential osnoise latency repeatedly.
1093  * Different from hwlat_detector, it is called with preemption and interrupts
1094  * enabled. This allows irqs, softirqs and threads to run, interfering on the
1095  * osnoise sampling thread, as they would do with a regular thread.
1096  */
1097 static int run_osnoise(void)
1098 {
1099         struct osnoise_variables *osn_var = this_cpu_osn_var();
1100         struct trace_array *tr = osnoise_trace;
1101         u64 start, sample, last_sample;
1102         u64 last_int_count, int_count;
1103         s64 noise = 0, max_noise = 0;
1104         s64 total, last_total = 0;
1105         struct osnoise_sample s;
1106         unsigned int threshold;
1107         u64 runtime, stop_in;
1108         u64 sum_noise = 0;
1109         int hw_count = 0;
1110         int ret = -1;
1111
1112         /*
1113          * Considers the current thread as the workload.
1114          */
1115         osn_var->pid = current->pid;
1116
1117         /*
1118          * Save the current stats for the diff
1119          */
1120         save_osn_sample_stats(osn_var, &s);
1121
1122         /*
1123          * if threshold is 0, use the default value of 5 us.
1124          */
1125         threshold = tracing_thresh ? : 5000;
1126
1127         /*
1128          * Make sure NMIs see sampling first
1129          */
1130         osn_var->sampling = true;
1131         barrier();
1132
1133         /*
1134          * Transform the *_us config to nanoseconds to avoid the
1135          * division on the main loop.
1136          */
1137         runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1138         stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1139
1140         /*
1141          * Start timestemp
1142          */
1143         start = time_get();
1144
1145         /*
1146          * "previous" loop.
1147          */
1148         last_int_count = set_int_safe_time(osn_var, &last_sample);
1149
1150         do {
1151                 /*
1152                  * Get sample!
1153                  */
1154                 int_count = set_int_safe_time(osn_var, &sample);
1155
1156                 noise = time_sub(sample, last_sample);
1157
1158                 /*
1159                  * This shouldn't happen.
1160                  */
1161                 if (noise < 0) {
1162                         osnoise_taint("negative noise!");
1163                         goto out;
1164                 }
1165
1166                 /*
1167                  * Sample runtime.
1168                  */
1169                 total = time_sub(sample, start);
1170
1171                 /*
1172                  * Check for possible overflows.
1173                  */
1174                 if (total < last_total) {
1175                         osnoise_taint("total overflow!");
1176                         break;
1177                 }
1178
1179                 last_total = total;
1180
1181                 if (noise >= threshold) {
1182                         int interference = int_count - last_int_count;
1183
1184                         if (noise > max_noise)
1185                                 max_noise = noise;
1186
1187                         if (!interference)
1188                                 hw_count++;
1189
1190                         sum_noise += noise;
1191
1192                         trace_sample_threshold(last_sample, noise, interference);
1193
1194                         if (osnoise_data.stop_tracing)
1195                                 if (noise > stop_in)
1196                                         osnoise_stop_tracing();
1197                 }
1198
1199                 /*
1200                  * For the non-preemptive kernel config: let threads runs, if
1201                  * they so wish.
1202                  */
1203                 cond_resched();
1204
1205                 last_sample = sample;
1206                 last_int_count = int_count;
1207
1208         } while (total < runtime && !kthread_should_stop());
1209
1210         /*
1211          * Finish the above in the view for interrupts.
1212          */
1213         barrier();
1214
1215         osn_var->sampling = false;
1216
1217         /*
1218          * Make sure sampling data is no longer updated.
1219          */
1220         barrier();
1221
1222         /*
1223          * Save noise info.
1224          */
1225         s.noise = time_to_us(sum_noise);
1226         s.runtime = time_to_us(total);
1227         s.max_sample = time_to_us(max_noise);
1228         s.hw_count = hw_count;
1229
1230         /* Save interference stats info */
1231         diff_osn_sample_stats(osn_var, &s);
1232
1233         trace_osnoise_sample(&s);
1234
1235         /* Keep a running maximum ever recorded osnoise "latency" */
1236         if (max_noise > tr->max_latency) {
1237                 tr->max_latency = max_noise;
1238                 latency_fsnotify(tr);
1239         }
1240
1241         if (osnoise_data.stop_tracing_total)
1242                 if (s.noise > osnoise_data.stop_tracing_total)
1243                         osnoise_stop_tracing();
1244
1245         return 0;
1246 out:
1247         return ret;
1248 }
1249
1250 static struct cpumask osnoise_cpumask;
1251 static struct cpumask save_cpumask;
1252
1253 /*
1254  * osnoise_main - The osnoise detection kernel thread
1255  *
1256  * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1257  * every period.
1258  */
1259 static int osnoise_main(void *data)
1260 {
1261         u64 interval;
1262
1263         while (!kthread_should_stop()) {
1264
1265                 run_osnoise();
1266
1267                 mutex_lock(&interface_lock);
1268                 interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1269                 mutex_unlock(&interface_lock);
1270
1271                 do_div(interval, USEC_PER_MSEC);
1272
1273                 /*
1274                  * differently from hwlat_detector, the osnoise tracer can run
1275                  * without a pause because preemption is on.
1276                  */
1277                 if (interval < 1) {
1278                         /* Let synchronize_rcu_tasks() make progress */
1279                         cond_resched_tasks_rcu_qs();
1280                         continue;
1281                 }
1282
1283                 if (msleep_interruptible(interval))
1284                         break;
1285         }
1286
1287         return 0;
1288 }
1289
1290 #ifdef CONFIG_TIMERLAT_TRACER
1291 /*
1292  * timerlat_irq - hrtimer handler for timerlat.
1293  */
1294 static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1295 {
1296         struct osnoise_variables *osn_var = this_cpu_osn_var();
1297         struct trace_array *tr = osnoise_trace;
1298         struct timerlat_variables *tlat;
1299         struct timerlat_sample s;
1300         u64 now;
1301         u64 diff;
1302
1303         /*
1304          * I am not sure if the timer was armed for this CPU. So, get
1305          * the timerlat struct from the timer itself, not from this
1306          * CPU.
1307          */
1308         tlat = container_of(timer, struct timerlat_variables, timer);
1309
1310         now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1311
1312         /*
1313          * Enable the osnoise: events for thread an softirq.
1314          */
1315         tlat->tracing_thread = true;
1316
1317         osn_var->thread.arrival_time = time_get();
1318
1319         /*
1320          * A hardirq is running: the timer IRQ. It is for sure preempting
1321          * a thread, and potentially preempting a softirq.
1322          *
1323          * At this point, it is not interesting to know the duration of the
1324          * preempted thread (and maybe softirq), but how much time they will
1325          * delay the beginning of the execution of the timer thread.
1326          *
1327          * To get the correct (net) delay added by the softirq, its delta_start
1328          * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1329          * start of the sofitrq will be zeroed, accounting then only the time
1330          * after that.
1331          *
1332          * The thread follows the same principle. However, if a softirq is
1333          * running, the thread needs to receive the softirq delta_start. The
1334          * reason being is that the softirq will be the last to be unfolded,
1335          * resseting the thread delay to zero.
1336          */
1337 #ifndef CONFIG_PREEMPT_RT
1338         if (osn_var->softirq.delta_start) {
1339                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1340                                    &osn_var->softirq.delta_start);
1341
1342                 copy_int_safe_time(osn_var, &osn_var->softirq.delta_start,
1343                                     &osn_var->irq.delta_start);
1344         } else {
1345                 copy_int_safe_time(osn_var, &osn_var->thread.delta_start,
1346                                     &osn_var->irq.delta_start);
1347         }
1348 #else /* CONFIG_PREEMPT_RT */
1349         /*
1350          * The sofirqs run as threads on RT, so there is not need
1351          * to keep track of it.
1352          */
1353         copy_int_safe_time(osn_var, &osn_var->thread.delta_start, &osn_var->irq.delta_start);
1354 #endif /* CONFIG_PREEMPT_RT */
1355
1356         /*
1357          * Compute the current time with the expected time.
1358          */
1359         diff = now - tlat->abs_period;
1360
1361         tlat->count++;
1362         s.seqnum = tlat->count;
1363         s.timer_latency = diff;
1364         s.context = IRQ_CONTEXT;
1365
1366         trace_timerlat_sample(&s);
1367
1368         /* Keep a running maximum ever recorded os noise "latency" */
1369         if (diff > tr->max_latency) {
1370                 tr->max_latency = diff;
1371                 latency_fsnotify(tr);
1372         }
1373
1374         if (osnoise_data.stop_tracing)
1375                 if (time_to_us(diff) >= osnoise_data.stop_tracing)
1376                         osnoise_stop_tracing();
1377
1378         wake_up_process(tlat->kthread);
1379
1380         if (osnoise_data.print_stack)
1381                 timerlat_save_stack(0);
1382
1383         return HRTIMER_NORESTART;
1384 }
1385
1386 /*
1387  * wait_next_period - Wait for the next period for timerlat
1388  */
1389 static int wait_next_period(struct timerlat_variables *tlat)
1390 {
1391         ktime_t next_abs_period, now;
1392         u64 rel_period = osnoise_data.timerlat_period * 1000;
1393
1394         now = hrtimer_cb_get_time(&tlat->timer);
1395         next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1396
1397         /*
1398          * Save the next abs_period.
1399          */
1400         tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1401
1402         /*
1403          * If the new abs_period is in the past, skip the activation.
1404          */
1405         while (ktime_compare(now, next_abs_period) > 0) {
1406                 next_abs_period = ns_to_ktime(tlat->abs_period + rel_period);
1407                 tlat->abs_period = (u64) ktime_to_ns(next_abs_period);
1408         }
1409
1410         set_current_state(TASK_INTERRUPTIBLE);
1411
1412         hrtimer_start(&tlat->timer, next_abs_period, HRTIMER_MODE_ABS_PINNED_HARD);
1413         schedule();
1414         return 1;
1415 }
1416
1417 /*
1418  * timerlat_main- Timerlat main
1419  */
1420 static int timerlat_main(void *data)
1421 {
1422         struct osnoise_variables *osn_var = this_cpu_osn_var();
1423         struct timerlat_variables *tlat = this_cpu_tmr_var();
1424         struct timerlat_sample s;
1425         struct sched_param sp;
1426         u64 now, diff;
1427
1428         /*
1429          * Make the thread RT, that is how cyclictest is usually used.
1430          */
1431         sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1432         sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1433
1434         tlat->count = 0;
1435         tlat->tracing_thread = false;
1436
1437         hrtimer_init(&tlat->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD);
1438         tlat->timer.function = timerlat_irq;
1439         tlat->kthread = current;
1440         osn_var->pid = current->pid;
1441         /*
1442          * Anotate the arrival time.
1443          */
1444         tlat->abs_period = hrtimer_cb_get_time(&tlat->timer);
1445
1446         wait_next_period(tlat);
1447
1448         osn_var->sampling = 1;
1449
1450         while (!kthread_should_stop()) {
1451                 now = ktime_to_ns(hrtimer_cb_get_time(&tlat->timer));
1452                 diff = now - tlat->abs_period;
1453
1454                 s.seqnum = tlat->count;
1455                 s.timer_latency = diff;
1456                 s.context = THREAD_CONTEXT;
1457
1458                 trace_timerlat_sample(&s);
1459
1460 #ifdef CONFIG_STACKTRACE
1461                 if (osnoise_data.print_stack)
1462                         if (osnoise_data.print_stack <= time_to_us(diff))
1463                                 timerlat_dump_stack();
1464 #endif /* CONFIG_STACKTRACE */
1465
1466                 tlat->tracing_thread = false;
1467                 if (osnoise_data.stop_tracing_total)
1468                         if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1469                                 osnoise_stop_tracing();
1470
1471                 wait_next_period(tlat);
1472         }
1473
1474         hrtimer_cancel(&tlat->timer);
1475         return 0;
1476 }
1477 #endif /* CONFIG_TIMERLAT_TRACER */
1478
1479 /*
1480  * stop_kthread - stop a workload thread
1481  */
1482 static void stop_kthread(unsigned int cpu)
1483 {
1484         struct task_struct *kthread;
1485
1486         kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
1487         if (kthread)
1488                 kthread_stop(kthread);
1489         per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1490 }
1491
1492 /*
1493  * stop_per_cpu_kthread - Stop per-cpu threads
1494  *
1495  * Stop the osnoise sampling htread. Use this on unload and at system
1496  * shutdown.
1497  */
1498 static void stop_per_cpu_kthreads(void)
1499 {
1500         int cpu;
1501
1502         cpus_read_lock();
1503
1504         for_each_online_cpu(cpu)
1505                 stop_kthread(cpu);
1506
1507         cpus_read_unlock();
1508 }
1509
1510 /*
1511  * start_kthread - Start a workload tread
1512  */
1513 static int start_kthread(unsigned int cpu)
1514 {
1515         struct task_struct *kthread;
1516         void *main = osnoise_main;
1517         char comm[24];
1518
1519 #ifdef CONFIG_TIMERLAT_TRACER
1520         if (osnoise_data.timerlat_tracer) {
1521                 snprintf(comm, 24, "timerlat/%d", cpu);
1522                 main = timerlat_main;
1523         } else {
1524                 snprintf(comm, 24, "osnoise/%d", cpu);
1525         }
1526 #else
1527         snprintf(comm, 24, "osnoise/%d", cpu);
1528 #endif
1529         kthread = kthread_create_on_cpu(main, NULL, cpu, comm);
1530
1531         if (IS_ERR(kthread)) {
1532                 pr_err(BANNER "could not start sampling thread\n");
1533                 stop_per_cpu_kthreads();
1534                 return -ENOMEM;
1535         }
1536
1537         per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
1538         wake_up_process(kthread);
1539
1540         return 0;
1541 }
1542
1543 /*
1544  * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
1545  *
1546  * This starts the kernel thread that will look for osnoise on many
1547  * cpus.
1548  */
1549 static int start_per_cpu_kthreads(struct trace_array *tr)
1550 {
1551         struct cpumask *current_mask = &save_cpumask;
1552         int retval = 0;
1553         int cpu;
1554
1555         cpus_read_lock();
1556         /*
1557          * Run only on CPUs in which trace and osnoise are allowed to run.
1558          */
1559         cpumask_and(current_mask, tr->tracing_cpumask, &osnoise_cpumask);
1560         /*
1561          * And the CPU is online.
1562          */
1563         cpumask_and(current_mask, cpu_online_mask, current_mask);
1564
1565         for_each_possible_cpu(cpu)
1566                 per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
1567
1568         for_each_cpu(cpu, current_mask) {
1569                 retval = start_kthread(cpu);
1570                 if (retval) {
1571                         stop_per_cpu_kthreads();
1572                         break;
1573                 }
1574         }
1575
1576         cpus_read_unlock();
1577
1578         return retval;
1579 }
1580
1581 #ifdef CONFIG_HOTPLUG_CPU
1582 static void osnoise_hotplug_workfn(struct work_struct *dummy)
1583 {
1584         struct trace_array *tr = osnoise_trace;
1585         unsigned int cpu = smp_processor_id();
1586
1587
1588         mutex_lock(&trace_types_lock);
1589
1590         if (!osnoise_busy)
1591                 goto out_unlock_trace;
1592
1593         mutex_lock(&interface_lock);
1594         cpus_read_lock();
1595
1596         if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
1597                 goto out_unlock;
1598
1599         if (!cpumask_test_cpu(cpu, tr->tracing_cpumask))
1600                 goto out_unlock;
1601
1602         start_kthread(cpu);
1603
1604 out_unlock:
1605         cpus_read_unlock();
1606         mutex_unlock(&interface_lock);
1607 out_unlock_trace:
1608         mutex_unlock(&trace_types_lock);
1609 }
1610
1611 static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
1612
1613 /*
1614  * osnoise_cpu_init - CPU hotplug online callback function
1615  */
1616 static int osnoise_cpu_init(unsigned int cpu)
1617 {
1618         schedule_work_on(cpu, &osnoise_hotplug_work);
1619         return 0;
1620 }
1621
1622 /*
1623  * osnoise_cpu_die - CPU hotplug offline callback function
1624  */
1625 static int osnoise_cpu_die(unsigned int cpu)
1626 {
1627         stop_kthread(cpu);
1628         return 0;
1629 }
1630
1631 static void osnoise_init_hotplug_support(void)
1632 {
1633         int ret;
1634
1635         ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "trace/osnoise:online",
1636                                 osnoise_cpu_init, osnoise_cpu_die);
1637         if (ret < 0)
1638                 pr_warn(BANNER "Error to init cpu hotplug support\n");
1639
1640         return;
1641 }
1642 #else /* CONFIG_HOTPLUG_CPU */
1643 static void osnoise_init_hotplug_support(void)
1644 {
1645         return;
1646 }
1647 #endif /* CONFIG_HOTPLUG_CPU */
1648
1649 /*
1650  * osnoise_cpus_read - Read function for reading the "cpus" file
1651  * @filp: The active open file structure
1652  * @ubuf: The userspace provided buffer to read value into
1653  * @cnt: The maximum number of bytes to read
1654  * @ppos: The current "file" position
1655  *
1656  * Prints the "cpus" output into the user-provided buffer.
1657  */
1658 static ssize_t
1659 osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
1660                   loff_t *ppos)
1661 {
1662         char *mask_str;
1663         int len;
1664
1665         mutex_lock(&interface_lock);
1666
1667         len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
1668         mask_str = kmalloc(len, GFP_KERNEL);
1669         if (!mask_str) {
1670                 count = -ENOMEM;
1671                 goto out_unlock;
1672         }
1673
1674         len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
1675         if (len >= count) {
1676                 count = -EINVAL;
1677                 goto out_free;
1678         }
1679
1680         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
1681
1682 out_free:
1683         kfree(mask_str);
1684 out_unlock:
1685         mutex_unlock(&interface_lock);
1686
1687         return count;
1688 }
1689
1690 static void osnoise_tracer_start(struct trace_array *tr);
1691 static void osnoise_tracer_stop(struct trace_array *tr);
1692
1693 /*
1694  * osnoise_cpus_write - Write function for "cpus" entry
1695  * @filp: The active open file structure
1696  * @ubuf: The user buffer that contains the value to write
1697  * @cnt: The maximum number of bytes to write to "file"
1698  * @ppos: The current position in @file
1699  *
1700  * This function provides a write implementation for the "cpus"
1701  * interface to the osnoise trace. By default, it lists all  CPUs,
1702  * in this way, allowing osnoise threads to run on any online CPU
1703  * of the system. It serves to restrict the execution of osnoise to the
1704  * set of CPUs writing via this interface. Note that osnoise also
1705  * respects the "tracing_cpumask." Hence, osnoise threads will run only
1706  * on the set of CPUs allowed here AND on "tracing_cpumask." Why not
1707  * have just "tracing_cpumask?" Because the user might be interested
1708  * in tracing what is running on other CPUs. For instance, one might
1709  * run osnoise in one HT CPU while observing what is running on the
1710  * sibling HT CPU.
1711  */
1712 static ssize_t
1713 osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
1714                    loff_t *ppos)
1715 {
1716         struct trace_array *tr = osnoise_trace;
1717         cpumask_var_t osnoise_cpumask_new;
1718         int running, err;
1719         char buf[256];
1720
1721         if (count >= 256)
1722                 return -EINVAL;
1723
1724         if (copy_from_user(buf, ubuf, count))
1725                 return -EFAULT;
1726
1727         if (!zalloc_cpumask_var(&osnoise_cpumask_new, GFP_KERNEL))
1728                 return -ENOMEM;
1729
1730         err = cpulist_parse(buf, osnoise_cpumask_new);
1731         if (err)
1732                 goto err_free;
1733
1734         /*
1735          * trace_types_lock is taken to avoid concurrency on start/stop
1736          * and osnoise_busy.
1737          */
1738         mutex_lock(&trace_types_lock);
1739         running = osnoise_busy;
1740         if (running)
1741                 osnoise_tracer_stop(tr);
1742
1743         mutex_lock(&interface_lock);
1744         /*
1745          * osnoise_cpumask is read by CPU hotplug operations.
1746          */
1747         cpus_read_lock();
1748
1749         cpumask_copy(&osnoise_cpumask, osnoise_cpumask_new);
1750
1751         cpus_read_unlock();
1752         mutex_unlock(&interface_lock);
1753
1754         if (running)
1755                 osnoise_tracer_start(tr);
1756         mutex_unlock(&trace_types_lock);
1757
1758         free_cpumask_var(osnoise_cpumask_new);
1759         return count;
1760
1761 err_free:
1762         free_cpumask_var(osnoise_cpumask_new);
1763
1764         return err;
1765 }
1766
1767 /*
1768  * osnoise/runtime_us: cannot be greater than the period.
1769  */
1770 static struct trace_min_max_param osnoise_runtime = {
1771         .lock   = &interface_lock,
1772         .val    = &osnoise_data.sample_runtime,
1773         .max    = &osnoise_data.sample_period,
1774         .min    = NULL,
1775 };
1776
1777 /*
1778  * osnoise/period_us: cannot be smaller than the runtime.
1779  */
1780 static struct trace_min_max_param osnoise_period = {
1781         .lock   = &interface_lock,
1782         .val    = &osnoise_data.sample_period,
1783         .max    = NULL,
1784         .min    = &osnoise_data.sample_runtime,
1785 };
1786
1787 /*
1788  * osnoise/stop_tracing_us: no limit.
1789  */
1790 static struct trace_min_max_param osnoise_stop_tracing_in = {
1791         .lock   = &interface_lock,
1792         .val    = &osnoise_data.stop_tracing,
1793         .max    = NULL,
1794         .min    = NULL,
1795 };
1796
1797 /*
1798  * osnoise/stop_tracing_total_us: no limit.
1799  */
1800 static struct trace_min_max_param osnoise_stop_tracing_total = {
1801         .lock   = &interface_lock,
1802         .val    = &osnoise_data.stop_tracing_total,
1803         .max    = NULL,
1804         .min    = NULL,
1805 };
1806
1807 #ifdef CONFIG_TIMERLAT_TRACER
1808 /*
1809  * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
1810  * latency is higher than val.
1811  */
1812 static struct trace_min_max_param osnoise_print_stack = {
1813         .lock   = &interface_lock,
1814         .val    = &osnoise_data.print_stack,
1815         .max    = NULL,
1816         .min    = NULL,
1817 };
1818
1819 /*
1820  * osnoise/timerlat_period: min 100 us, max 1 s
1821  */
1822 u64 timerlat_min_period = 100;
1823 u64 timerlat_max_period = 1000000;
1824 static struct trace_min_max_param timerlat_period = {
1825         .lock   = &interface_lock,
1826         .val    = &osnoise_data.timerlat_period,
1827         .max    = &timerlat_max_period,
1828         .min    = &timerlat_min_period,
1829 };
1830 #endif
1831
1832 static const struct file_operations cpus_fops = {
1833         .open           = tracing_open_generic,
1834         .read           = osnoise_cpus_read,
1835         .write          = osnoise_cpus_write,
1836         .llseek         = generic_file_llseek,
1837 };
1838
1839 /*
1840  * init_tracefs - A function to initialize the tracefs interface files
1841  *
1842  * This function creates entries in tracefs for "osnoise" and "timerlat".
1843  * It creates these directories in the tracing directory, and within that
1844  * directory the use can change and view the configs.
1845  */
1846 static int init_tracefs(void)
1847 {
1848         struct dentry *top_dir;
1849         struct dentry *tmp;
1850         int ret;
1851
1852         ret = tracing_init_dentry();
1853         if (ret)
1854                 return -ENOMEM;
1855
1856         top_dir = tracefs_create_dir("osnoise", NULL);
1857         if (!top_dir)
1858                 return 0;
1859
1860         tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir,
1861                                   &osnoise_period, &trace_min_max_fops);
1862         if (!tmp)
1863                 goto err;
1864
1865         tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir,
1866                                   &osnoise_runtime, &trace_min_max_fops);
1867         if (!tmp)
1868                 goto err;
1869
1870         tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir,
1871                                   &osnoise_stop_tracing_in, &trace_min_max_fops);
1872         if (!tmp)
1873                 goto err;
1874
1875         tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir,
1876                                   &osnoise_stop_tracing_total, &trace_min_max_fops);
1877         if (!tmp)
1878                 goto err;
1879
1880         tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops);
1881         if (!tmp)
1882                 goto err;
1883 #ifdef CONFIG_TIMERLAT_TRACER
1884 #ifdef CONFIG_STACKTRACE
1885         tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir,
1886                                   &osnoise_print_stack, &trace_min_max_fops);
1887         if (!tmp)
1888                 goto err;
1889 #endif
1890
1891         tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir,
1892                                   &timerlat_period, &trace_min_max_fops);
1893         if (!tmp)
1894                 goto err;
1895 #endif
1896
1897         return 0;
1898
1899 err:
1900         tracefs_remove(top_dir);
1901         return -ENOMEM;
1902 }
1903
1904 static int osnoise_hook_events(void)
1905 {
1906         int retval;
1907
1908         /*
1909          * Trace is already hooked, we are re-enabling from
1910          * a stop_tracing_*.
1911          */
1912         if (trace_osnoise_callback_enabled)
1913                 return 0;
1914
1915         retval = hook_irq_events();
1916         if (retval)
1917                 return -EINVAL;
1918
1919         retval = hook_softirq_events();
1920         if (retval)
1921                 goto out_unhook_irq;
1922
1923         retval = hook_thread_events();
1924         /*
1925          * All fine!
1926          */
1927         if (!retval)
1928                 return 0;
1929
1930         unhook_softirq_events();
1931 out_unhook_irq:
1932         unhook_irq_events();
1933         return -EINVAL;
1934 }
1935
1936 static int __osnoise_tracer_start(struct trace_array *tr)
1937 {
1938         int retval;
1939
1940         osn_var_reset_all();
1941
1942         retval = osnoise_hook_events();
1943         if (retval)
1944                 return retval;
1945         /*
1946          * Make sure NMIs see reseted values.
1947          */
1948         barrier();
1949         trace_osnoise_callback_enabled = true;
1950
1951         retval = start_per_cpu_kthreads(tr);
1952         if (retval) {
1953                 unhook_irq_events();
1954                 return retval;
1955         }
1956
1957         osnoise_busy = true;
1958
1959         return 0;
1960 }
1961
1962 static void osnoise_tracer_start(struct trace_array *tr)
1963 {
1964         int retval;
1965
1966         if (osnoise_busy)
1967                 return;
1968
1969         retval = __osnoise_tracer_start(tr);
1970         if (retval)
1971                 pr_err(BANNER "Error starting osnoise tracer\n");
1972
1973 }
1974
1975 static void osnoise_tracer_stop(struct trace_array *tr)
1976 {
1977         if (!osnoise_busy)
1978                 return;
1979
1980         trace_osnoise_callback_enabled = false;
1981         barrier();
1982
1983         stop_per_cpu_kthreads();
1984
1985         unhook_irq_events();
1986         unhook_softirq_events();
1987         unhook_thread_events();
1988
1989         osnoise_busy = false;
1990 }
1991
1992 static int osnoise_tracer_init(struct trace_array *tr)
1993 {
1994
1995         /* Only allow one instance to enable this */
1996         if (osnoise_busy)
1997                 return -EBUSY;
1998
1999         osnoise_trace = tr;
2000         tr->max_latency = 0;
2001
2002         osnoise_tracer_start(tr);
2003
2004         return 0;
2005 }
2006
2007 static void osnoise_tracer_reset(struct trace_array *tr)
2008 {
2009         osnoise_tracer_stop(tr);
2010 }
2011
2012 static struct tracer osnoise_tracer __read_mostly = {
2013         .name           = "osnoise",
2014         .init           = osnoise_tracer_init,
2015         .reset          = osnoise_tracer_reset,
2016         .start          = osnoise_tracer_start,
2017         .stop           = osnoise_tracer_stop,
2018         .print_header   = print_osnoise_headers,
2019         .allow_instances = true,
2020 };
2021
2022 #ifdef CONFIG_TIMERLAT_TRACER
2023 static void timerlat_tracer_start(struct trace_array *tr)
2024 {
2025         int retval;
2026
2027         if (osnoise_busy)
2028                 return;
2029
2030         osnoise_data.timerlat_tracer = 1;
2031
2032         retval = __osnoise_tracer_start(tr);
2033         if (retval)
2034                 goto out_err;
2035
2036         return;
2037 out_err:
2038         pr_err(BANNER "Error starting timerlat tracer\n");
2039 }
2040
2041 static void timerlat_tracer_stop(struct trace_array *tr)
2042 {
2043         int cpu;
2044
2045         if (!osnoise_busy)
2046                 return;
2047
2048         for_each_online_cpu(cpu)
2049                 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
2050
2051         osnoise_tracer_stop(tr);
2052
2053         osnoise_data.timerlat_tracer = 0;
2054 }
2055
2056 static int timerlat_tracer_init(struct trace_array *tr)
2057 {
2058         /* Only allow one instance to enable this */
2059         if (osnoise_busy)
2060                 return -EBUSY;
2061
2062         osnoise_trace = tr;
2063
2064         tr->max_latency = 0;
2065
2066         timerlat_tracer_start(tr);
2067
2068         return 0;
2069 }
2070
2071 static void timerlat_tracer_reset(struct trace_array *tr)
2072 {
2073         timerlat_tracer_stop(tr);
2074 }
2075
2076 static struct tracer timerlat_tracer __read_mostly = {
2077         .name           = "timerlat",
2078         .init           = timerlat_tracer_init,
2079         .reset          = timerlat_tracer_reset,
2080         .start          = timerlat_tracer_start,
2081         .stop           = timerlat_tracer_stop,
2082         .print_header   = print_timerlat_headers,
2083         .allow_instances = true,
2084 };
2085 #endif /* CONFIG_TIMERLAT_TRACER */
2086
2087 __init static int init_osnoise_tracer(void)
2088 {
2089         int ret;
2090
2091         mutex_init(&interface_lock);
2092
2093         cpumask_copy(&osnoise_cpumask, cpu_all_mask);
2094
2095         ret = register_tracer(&osnoise_tracer);
2096         if (ret) {
2097                 pr_err(BANNER "Error registering osnoise!\n");
2098                 return ret;
2099         }
2100
2101 #ifdef CONFIG_TIMERLAT_TRACER
2102         ret = register_tracer(&timerlat_tracer);
2103         if (ret) {
2104                 pr_err(BANNER "Error registering timerlat\n");
2105                 return ret;
2106         }
2107 #endif
2108         osnoise_init_hotplug_support();
2109
2110         init_tracefs();
2111
2112         return 0;
2113 }
2114 late_initcall(init_osnoise_tracer);