sched: Provide cant_migrate()
[linux-2.6-microblaze.git] / include / trace / events / sched.h
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #undef TRACE_SYSTEM
3 #define TRACE_SYSTEM sched
4
5 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
6 #define _TRACE_SCHED_H
7
8 #include <linux/sched/numa_balancing.h>
9 #include <linux/tracepoint.h>
10 #include <linux/binfmts.h>
11
12 /*
13  * Tracepoint for calling kthread_stop, performed to end a kthread:
14  */
15 TRACE_EVENT(sched_kthread_stop,
16
17         TP_PROTO(struct task_struct *t),
18
19         TP_ARGS(t),
20
21         TP_STRUCT__entry(
22                 __array(        char,   comm,   TASK_COMM_LEN   )
23                 __field(        pid_t,  pid                     )
24         ),
25
26         TP_fast_assign(
27                 memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
28                 __entry->pid    = t->pid;
29         ),
30
31         TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
32 );
33
34 /*
35  * Tracepoint for the return value of the kthread stopping:
36  */
37 TRACE_EVENT(sched_kthread_stop_ret,
38
39         TP_PROTO(int ret),
40
41         TP_ARGS(ret),
42
43         TP_STRUCT__entry(
44                 __field(        int,    ret     )
45         ),
46
47         TP_fast_assign(
48                 __entry->ret    = ret;
49         ),
50
51         TP_printk("ret=%d", __entry->ret)
52 );
53
54 /*
55  * Tracepoint for waking up a task:
56  */
57 DECLARE_EVENT_CLASS(sched_wakeup_template,
58
59         TP_PROTO(struct task_struct *p),
60
61         TP_ARGS(__perf_task(p)),
62
63         TP_STRUCT__entry(
64                 __array(        char,   comm,   TASK_COMM_LEN   )
65                 __field(        pid_t,  pid                     )
66                 __field(        int,    prio                    )
67                 __field(        int,    success                 )
68                 __field(        int,    target_cpu              )
69         ),
70
71         TP_fast_assign(
72                 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
73                 __entry->pid            = p->pid;
74                 __entry->prio           = p->prio; /* XXX SCHED_DEADLINE */
75                 __entry->success        = 1; /* rudiment, kill when possible */
76                 __entry->target_cpu     = task_cpu(p);
77         ),
78
79         TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
80                   __entry->comm, __entry->pid, __entry->prio,
81                   __entry->target_cpu)
82 );
83
84 /*
85  * Tracepoint called when waking a task; this tracepoint is guaranteed to be
86  * called from the waking context.
87  */
88 DEFINE_EVENT(sched_wakeup_template, sched_waking,
89              TP_PROTO(struct task_struct *p),
90              TP_ARGS(p));
91
92 /*
93  * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
94  * It it not always called from the waking context.
95  */
96 DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
97              TP_PROTO(struct task_struct *p),
98              TP_ARGS(p));
99
100 /*
101  * Tracepoint for waking up a new task:
102  */
103 DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
104              TP_PROTO(struct task_struct *p),
105              TP_ARGS(p));
106
107 #ifdef CREATE_TRACE_POINTS
108 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
109 {
110         unsigned int state;
111
112 #ifdef CONFIG_SCHED_DEBUG
113         BUG_ON(p != current);
114 #endif /* CONFIG_SCHED_DEBUG */
115
116         /*
117          * Preemption ignores task state, therefore preempted tasks are always
118          * RUNNING (we will not have dequeued if state != RUNNING).
119          */
120         if (preempt)
121                 return TASK_REPORT_MAX;
122
123         /*
124          * task_state_index() uses fls() and returns a value from 0-8 range.
125          * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
126          * it for left shift operation to get the correct task->state
127          * mapping.
128          */
129         state = task_state_index(p);
130
131         return state ? (1 << (state - 1)) : state;
132 }
133 #endif /* CREATE_TRACE_POINTS */
134
135 /*
136  * Tracepoint for task switches, performed by the scheduler:
137  */
138 TRACE_EVENT(sched_switch,
139
140         TP_PROTO(bool preempt,
141                  struct task_struct *prev,
142                  struct task_struct *next),
143
144         TP_ARGS(preempt, prev, next),
145
146         TP_STRUCT__entry(
147                 __array(        char,   prev_comm,      TASK_COMM_LEN   )
148                 __field(        pid_t,  prev_pid                        )
149                 __field(        int,    prev_prio                       )
150                 __field(        long,   prev_state                      )
151                 __array(        char,   next_comm,      TASK_COMM_LEN   )
152                 __field(        pid_t,  next_pid                        )
153                 __field(        int,    next_prio                       )
154         ),
155
156         TP_fast_assign(
157                 memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
158                 __entry->prev_pid       = prev->pid;
159                 __entry->prev_prio      = prev->prio;
160                 __entry->prev_state     = __trace_sched_switch_state(preempt, prev);
161                 memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
162                 __entry->next_pid       = next->pid;
163                 __entry->next_prio      = next->prio;
164                 /* XXX SCHED_DEADLINE */
165         ),
166
167         TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
168                 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
169
170                 (__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
171                   __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
172                                 { TASK_INTERRUPTIBLE, "S" },
173                                 { TASK_UNINTERRUPTIBLE, "D" },
174                                 { __TASK_STOPPED, "T" },
175                                 { __TASK_TRACED, "t" },
176                                 { EXIT_DEAD, "X" },
177                                 { EXIT_ZOMBIE, "Z" },
178                                 { TASK_PARKED, "P" },
179                                 { TASK_DEAD, "I" }) :
180                   "R",
181
182                 __entry->prev_state & TASK_REPORT_MAX ? "+" : "",
183                 __entry->next_comm, __entry->next_pid, __entry->next_prio)
184 );
185
186 /*
187  * Tracepoint for a task being migrated:
188  */
189 TRACE_EVENT(sched_migrate_task,
190
191         TP_PROTO(struct task_struct *p, int dest_cpu),
192
193         TP_ARGS(p, dest_cpu),
194
195         TP_STRUCT__entry(
196                 __array(        char,   comm,   TASK_COMM_LEN   )
197                 __field(        pid_t,  pid                     )
198                 __field(        int,    prio                    )
199                 __field(        int,    orig_cpu                )
200                 __field(        int,    dest_cpu                )
201         ),
202
203         TP_fast_assign(
204                 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
205                 __entry->pid            = p->pid;
206                 __entry->prio           = p->prio; /* XXX SCHED_DEADLINE */
207                 __entry->orig_cpu       = task_cpu(p);
208                 __entry->dest_cpu       = dest_cpu;
209         ),
210
211         TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
212                   __entry->comm, __entry->pid, __entry->prio,
213                   __entry->orig_cpu, __entry->dest_cpu)
214 );
215
216 DECLARE_EVENT_CLASS(sched_process_template,
217
218         TP_PROTO(struct task_struct *p),
219
220         TP_ARGS(p),
221
222         TP_STRUCT__entry(
223                 __array(        char,   comm,   TASK_COMM_LEN   )
224                 __field(        pid_t,  pid                     )
225                 __field(        int,    prio                    )
226         ),
227
228         TP_fast_assign(
229                 memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
230                 __entry->pid            = p->pid;
231                 __entry->prio           = p->prio; /* XXX SCHED_DEADLINE */
232         ),
233
234         TP_printk("comm=%s pid=%d prio=%d",
235                   __entry->comm, __entry->pid, __entry->prio)
236 );
237
238 /*
239  * Tracepoint for freeing a task:
240  */
241 DEFINE_EVENT(sched_process_template, sched_process_free,
242              TP_PROTO(struct task_struct *p),
243              TP_ARGS(p));
244
245 /*
246  * Tracepoint for a task exiting:
247  */
248 DEFINE_EVENT(sched_process_template, sched_process_exit,
249              TP_PROTO(struct task_struct *p),
250              TP_ARGS(p));
251
252 /*
253  * Tracepoint for waiting on task to unschedule:
254  */
255 DEFINE_EVENT(sched_process_template, sched_wait_task,
256         TP_PROTO(struct task_struct *p),
257         TP_ARGS(p));
258
259 /*
260  * Tracepoint for a waiting task:
261  */
262 TRACE_EVENT(sched_process_wait,
263
264         TP_PROTO(struct pid *pid),
265
266         TP_ARGS(pid),
267
268         TP_STRUCT__entry(
269                 __array(        char,   comm,   TASK_COMM_LEN   )
270                 __field(        pid_t,  pid                     )
271                 __field(        int,    prio                    )
272         ),
273
274         TP_fast_assign(
275                 memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
276                 __entry->pid            = pid_nr(pid);
277                 __entry->prio           = current->prio; /* XXX SCHED_DEADLINE */
278         ),
279
280         TP_printk("comm=%s pid=%d prio=%d",
281                   __entry->comm, __entry->pid, __entry->prio)
282 );
283
284 /*
285  * Tracepoint for do_fork:
286  */
287 TRACE_EVENT(sched_process_fork,
288
289         TP_PROTO(struct task_struct *parent, struct task_struct *child),
290
291         TP_ARGS(parent, child),
292
293         TP_STRUCT__entry(
294                 __array(        char,   parent_comm,    TASK_COMM_LEN   )
295                 __field(        pid_t,  parent_pid                      )
296                 __array(        char,   child_comm,     TASK_COMM_LEN   )
297                 __field(        pid_t,  child_pid                       )
298         ),
299
300         TP_fast_assign(
301                 memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
302                 __entry->parent_pid     = parent->pid;
303                 memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
304                 __entry->child_pid      = child->pid;
305         ),
306
307         TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
308                 __entry->parent_comm, __entry->parent_pid,
309                 __entry->child_comm, __entry->child_pid)
310 );
311
312 /*
313  * Tracepoint for exec:
314  */
315 TRACE_EVENT(sched_process_exec,
316
317         TP_PROTO(struct task_struct *p, pid_t old_pid,
318                  struct linux_binprm *bprm),
319
320         TP_ARGS(p, old_pid, bprm),
321
322         TP_STRUCT__entry(
323                 __string(       filename,       bprm->filename  )
324                 __field(        pid_t,          pid             )
325                 __field(        pid_t,          old_pid         )
326         ),
327
328         TP_fast_assign(
329                 __assign_str(filename, bprm->filename);
330                 __entry->pid            = p->pid;
331                 __entry->old_pid        = old_pid;
332         ),
333
334         TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
335                   __entry->pid, __entry->old_pid)
336 );
337
338
339 #ifdef CONFIG_SCHEDSTATS
340 #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT
341 #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS
342 #else
343 #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP
344 #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP
345 #endif
346
347 /*
348  * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
349  *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
350  */
351 DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template,
352
353         TP_PROTO(struct task_struct *tsk, u64 delay),
354
355         TP_ARGS(__perf_task(tsk), __perf_count(delay)),
356
357         TP_STRUCT__entry(
358                 __array( char,  comm,   TASK_COMM_LEN   )
359                 __field( pid_t, pid                     )
360                 __field( u64,   delay                   )
361         ),
362
363         TP_fast_assign(
364                 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
365                 __entry->pid    = tsk->pid;
366                 __entry->delay  = delay;
367         ),
368
369         TP_printk("comm=%s pid=%d delay=%Lu [ns]",
370                         __entry->comm, __entry->pid,
371                         (unsigned long long)__entry->delay)
372 );
373
374 /*
375  * Tracepoint for accounting wait time (time the task is runnable
376  * but not actually running due to scheduler contention).
377  */
378 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait,
379              TP_PROTO(struct task_struct *tsk, u64 delay),
380              TP_ARGS(tsk, delay));
381
382 /*
383  * Tracepoint for accounting sleep time (time the task is not runnable,
384  * including iowait, see below).
385  */
386 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep,
387              TP_PROTO(struct task_struct *tsk, u64 delay),
388              TP_ARGS(tsk, delay));
389
390 /*
391  * Tracepoint for accounting iowait time (time the task is not runnable
392  * due to waiting on IO to complete).
393  */
394 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait,
395              TP_PROTO(struct task_struct *tsk, u64 delay),
396              TP_ARGS(tsk, delay));
397
398 /*
399  * Tracepoint for accounting blocked time (time the task is in uninterruptible).
400  */
401 DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked,
402              TP_PROTO(struct task_struct *tsk, u64 delay),
403              TP_ARGS(tsk, delay));
404
405 /*
406  * Tracepoint for accounting runtime (time the task is executing
407  * on a CPU).
408  */
409 DECLARE_EVENT_CLASS(sched_stat_runtime,
410
411         TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
412
413         TP_ARGS(tsk, __perf_count(runtime), vruntime),
414
415         TP_STRUCT__entry(
416                 __array( char,  comm,   TASK_COMM_LEN   )
417                 __field( pid_t, pid                     )
418                 __field( u64,   runtime                 )
419                 __field( u64,   vruntime                        )
420         ),
421
422         TP_fast_assign(
423                 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
424                 __entry->pid            = tsk->pid;
425                 __entry->runtime        = runtime;
426                 __entry->vruntime       = vruntime;
427         ),
428
429         TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
430                         __entry->comm, __entry->pid,
431                         (unsigned long long)__entry->runtime,
432                         (unsigned long long)__entry->vruntime)
433 );
434
435 DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
436              TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
437              TP_ARGS(tsk, runtime, vruntime));
438
439 /*
440  * Tracepoint for showing priority inheritance modifying a tasks
441  * priority.
442  */
443 TRACE_EVENT(sched_pi_setprio,
444
445         TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
446
447         TP_ARGS(tsk, pi_task),
448
449         TP_STRUCT__entry(
450                 __array( char,  comm,   TASK_COMM_LEN   )
451                 __field( pid_t, pid                     )
452                 __field( int,   oldprio                 )
453                 __field( int,   newprio                 )
454         ),
455
456         TP_fast_assign(
457                 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
458                 __entry->pid            = tsk->pid;
459                 __entry->oldprio        = tsk->prio;
460                 __entry->newprio        = pi_task ?
461                                 min(tsk->normal_prio, pi_task->prio) :
462                                 tsk->normal_prio;
463                 /* XXX SCHED_DEADLINE bits missing */
464         ),
465
466         TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
467                         __entry->comm, __entry->pid,
468                         __entry->oldprio, __entry->newprio)
469 );
470
471 #ifdef CONFIG_DETECT_HUNG_TASK
472 TRACE_EVENT(sched_process_hang,
473         TP_PROTO(struct task_struct *tsk),
474         TP_ARGS(tsk),
475
476         TP_STRUCT__entry(
477                 __array( char,  comm,   TASK_COMM_LEN   )
478                 __field( pid_t, pid                     )
479         ),
480
481         TP_fast_assign(
482                 memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
483                 __entry->pid = tsk->pid;
484         ),
485
486         TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
487 );
488 #endif /* CONFIG_DETECT_HUNG_TASK */
489
490 DECLARE_EVENT_CLASS(sched_move_task_template,
491
492         TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
493
494         TP_ARGS(tsk, src_cpu, dst_cpu),
495
496         TP_STRUCT__entry(
497                 __field( pid_t, pid                     )
498                 __field( pid_t, tgid                    )
499                 __field( pid_t, ngid                    )
500                 __field( int,   src_cpu                 )
501                 __field( int,   src_nid                 )
502                 __field( int,   dst_cpu                 )
503                 __field( int,   dst_nid                 )
504         ),
505
506         TP_fast_assign(
507                 __entry->pid            = task_pid_nr(tsk);
508                 __entry->tgid           = task_tgid_nr(tsk);
509                 __entry->ngid           = task_numa_group_id(tsk);
510                 __entry->src_cpu        = src_cpu;
511                 __entry->src_nid        = cpu_to_node(src_cpu);
512                 __entry->dst_cpu        = dst_cpu;
513                 __entry->dst_nid        = cpu_to_node(dst_cpu);
514         ),
515
516         TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
517                         __entry->pid, __entry->tgid, __entry->ngid,
518                         __entry->src_cpu, __entry->src_nid,
519                         __entry->dst_cpu, __entry->dst_nid)
520 );
521
522 /*
523  * Tracks migration of tasks from one runqueue to another. Can be used to
524  * detect if automatic NUMA balancing is bouncing between nodes
525  */
526 DEFINE_EVENT(sched_move_task_template, sched_move_numa,
527         TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
528
529         TP_ARGS(tsk, src_cpu, dst_cpu)
530 );
531
532 DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
533         TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
534
535         TP_ARGS(tsk, src_cpu, dst_cpu)
536 );
537
538 TRACE_EVENT(sched_swap_numa,
539
540         TP_PROTO(struct task_struct *src_tsk, int src_cpu,
541                  struct task_struct *dst_tsk, int dst_cpu),
542
543         TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
544
545         TP_STRUCT__entry(
546                 __field( pid_t, src_pid                 )
547                 __field( pid_t, src_tgid                )
548                 __field( pid_t, src_ngid                )
549                 __field( int,   src_cpu                 )
550                 __field( int,   src_nid                 )
551                 __field( pid_t, dst_pid                 )
552                 __field( pid_t, dst_tgid                )
553                 __field( pid_t, dst_ngid                )
554                 __field( int,   dst_cpu                 )
555                 __field( int,   dst_nid                 )
556         ),
557
558         TP_fast_assign(
559                 __entry->src_pid        = task_pid_nr(src_tsk);
560                 __entry->src_tgid       = task_tgid_nr(src_tsk);
561                 __entry->src_ngid       = task_numa_group_id(src_tsk);
562                 __entry->src_cpu        = src_cpu;
563                 __entry->src_nid        = cpu_to_node(src_cpu);
564                 __entry->dst_pid        = task_pid_nr(dst_tsk);
565                 __entry->dst_tgid       = task_tgid_nr(dst_tsk);
566                 __entry->dst_ngid       = task_numa_group_id(dst_tsk);
567                 __entry->dst_cpu        = dst_cpu;
568                 __entry->dst_nid        = cpu_to_node(dst_cpu);
569         ),
570
571         TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
572                         __entry->src_pid, __entry->src_tgid, __entry->src_ngid,
573                         __entry->src_cpu, __entry->src_nid,
574                         __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
575                         __entry->dst_cpu, __entry->dst_nid)
576 );
577
578 /*
579  * Tracepoint for waking a polling cpu without an IPI.
580  */
581 TRACE_EVENT(sched_wake_idle_without_ipi,
582
583         TP_PROTO(int cpu),
584
585         TP_ARGS(cpu),
586
587         TP_STRUCT__entry(
588                 __field(        int,    cpu     )
589         ),
590
591         TP_fast_assign(
592                 __entry->cpu    = cpu;
593         ),
594
595         TP_printk("cpu=%d", __entry->cpu)
596 );
597
598 /*
599  * Following tracepoints are not exported in tracefs and provide hooking
600  * mechanisms only for testing and debugging purposes.
601  *
602  * Postfixed with _tp to make them easily identifiable in the code.
603  */
604 DECLARE_TRACE(pelt_cfs_tp,
605         TP_PROTO(struct cfs_rq *cfs_rq),
606         TP_ARGS(cfs_rq));
607
608 DECLARE_TRACE(pelt_rt_tp,
609         TP_PROTO(struct rq *rq),
610         TP_ARGS(rq));
611
612 DECLARE_TRACE(pelt_dl_tp,
613         TP_PROTO(struct rq *rq),
614         TP_ARGS(rq));
615
616 DECLARE_TRACE(pelt_irq_tp,
617         TP_PROTO(struct rq *rq),
618         TP_ARGS(rq));
619
620 DECLARE_TRACE(pelt_se_tp,
621         TP_PROTO(struct sched_entity *se),
622         TP_ARGS(se));
623
624 DECLARE_TRACE(sched_overutilized_tp,
625         TP_PROTO(struct root_domain *rd, bool overutilized),
626         TP_ARGS(rd, overutilized));
627
628 #endif /* _TRACE_SCHED_H */
629
630 /* This part must be outside protection */
631 #include <trace/define_trace.h>