1 // SPDX-License-Identifier: GPL-2.0-only
3 * Context tracking: Probe on high level context boundaries such as kernel
4 * and userspace. This includes syscalls and exceptions entry/exit.
6 * This is used by RCU to remove its dependency on the timer tick while a CPU
9 * Started by Frederic Weisbecker:
11 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
13 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
14 * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
18 #include <linux/context_tracking.h>
19 #include <linux/rcupdate.h>
20 #include <linux/sched.h>
21 #include <linux/hardirq.h>
22 #include <linux/export.h>
23 #include <linux/kprobes.h>
26 DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
27 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
28 .dynticks_nesting = 1,
29 .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
30 .dynticks = ATOMIC_INIT(1),
33 EXPORT_SYMBOL_GPL(context_tracking);
35 #ifdef CONFIG_CONTEXT_TRACKING_IDLE
36 noinstr void ct_idle_enter(void)
40 EXPORT_SYMBOL_GPL(ct_idle_enter);
42 void ct_idle_exit(void)
46 EXPORT_SYMBOL_GPL(ct_idle_exit);
49 * ct_irq_enter - inform RCU that current CPU is entering irq away from idle
51 * Enter an interrupt handler, which might possibly result in exiting
52 * idle mode, in other words, entering the mode in which read-side critical
53 * sections can occur. The caller must have disabled interrupts.
55 * Note that the Linux kernel is fully capable of entering an interrupt
56 * handler that it never exits, for example when doing upcalls to user mode!
57 * This code assumes that the idle loop never does upcalls to user mode.
58 * If your architecture's idle loop does do upcalls to user mode (or does
59 * anything else that results in unbalanced calls to the irq_enter() and
60 * irq_exit() functions), RCU will give you what you deserve, good and hard.
61 * But very infrequently and irreproducibly.
63 * Use things like work queues to work around this limitation.
65 * You have been warned.
67 * If you add or remove a call to ct_irq_enter(), be sure to test with
68 * CONFIG_RCU_EQS_DEBUG=y.
70 noinstr void ct_irq_enter(void)
72 lockdep_assert_irqs_disabled();
77 * ct_irq_exit - inform RCU that current CPU is exiting irq towards idle
79 * Exit from an interrupt handler, which might possibly result in entering
80 * idle mode, in other words, leaving the mode in which read-side critical
81 * sections can occur. The caller must have disabled interrupts.
83 * This code assumes that the idle loop never does anything that might
84 * result in unbalanced calls to irq_enter() and irq_exit(). If your
85 * architecture's idle loop violates this assumption, RCU will give you what
86 * you deserve, good and hard. But very infrequently and irreproducibly.
88 * Use things like work queues to work around this limitation.
90 * You have been warned.
92 * If you add or remove a call to ct_irq_exit(), be sure to test with
93 * CONFIG_RCU_EQS_DEBUG=y.
95 noinstr void ct_irq_exit(void)
97 lockdep_assert_irqs_disabled();
102 * Wrapper for ct_irq_enter() where interrupts are enabled.
104 * If you add or remove a call to ct_irq_enter_irqson(), be sure to test
105 * with CONFIG_RCU_EQS_DEBUG=y.
107 void ct_irq_enter_irqson(void)
111 local_irq_save(flags);
113 local_irq_restore(flags);
117 * Wrapper for ct_irq_exit() where interrupts are enabled.
119 * If you add or remove a call to ct_irq_exit_irqson(), be sure to test
120 * with CONFIG_RCU_EQS_DEBUG=y.
122 void ct_irq_exit_irqson(void)
126 local_irq_save(flags);
128 local_irq_restore(flags);
131 noinstr void ct_nmi_enter(void)
136 noinstr void ct_nmi_exit(void)
140 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
142 #ifdef CONFIG_CONTEXT_TRACKING_USER
144 #define CREATE_TRACE_POINTS
145 #include <trace/events/context_tracking.h>
147 DEFINE_STATIC_KEY_FALSE(context_tracking_key);
148 EXPORT_SYMBOL_GPL(context_tracking_key);
150 static noinstr bool context_tracking_recursion_enter(void)
154 recursion = __this_cpu_inc_return(context_tracking.recursion);
158 WARN_ONCE((recursion < 1), "Invalid context tracking recursion value %d\n", recursion);
159 __this_cpu_dec(context_tracking.recursion);
164 static __always_inline void context_tracking_recursion_exit(void)
166 __this_cpu_dec(context_tracking.recursion);
170 * __ct_user_enter - Inform the context tracking that the CPU is going
171 * to enter user or guest space mode.
173 * This function must be called right before we switch from the kernel
174 * to user or guest space, when it's guaranteed the remaining kernel
175 * instructions to execute won't use any RCU read side critical section
176 * because this function sets RCU in extended quiescent state.
178 void noinstr __ct_user_enter(enum ctx_state state)
180 lockdep_assert_irqs_disabled();
182 /* Kernel threads aren't supposed to go to userspace */
183 WARN_ON_ONCE(!current->mm);
185 if (!context_tracking_recursion_enter())
188 if ( __this_cpu_read(context_tracking.state) != state) {
189 if (__this_cpu_read(context_tracking.active)) {
191 * At this stage, only low level arch entry code remains and
192 * then we'll run in userspace. We can assume there won't be
193 * any RCU read-side critical section until the next call to
194 * user_exit() or ct_irq_enter(). Let's remove RCU's dependency
197 if (state == CONTEXT_USER) {
198 instrumentation_begin();
200 vtime_user_enter(current);
201 instrumentation_end();
204 * Other than generic entry implementation, we may be past the last
205 * rescheduling opportunity in the entry code. Trigger a self IPI
206 * that will fire and reschedule once we resume in user/guest mode.
208 rcu_irq_work_resched();
212 * Even if context tracking is disabled on this CPU, because it's outside
213 * the full dynticks mask for example, we still have to keep track of the
214 * context transitions and states to prevent inconsistency on those of
216 * If a task triggers an exception in userspace, sleep on the exception
217 * handler and then migrate to another CPU, that new CPU must know where
218 * the exception returns by the time we call exception_exit().
219 * This information can only be provided by the previous CPU when it called
221 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
222 * is false because we know that CPU is not tickless.
224 __this_cpu_write(context_tracking.state, state);
226 context_tracking_recursion_exit();
228 EXPORT_SYMBOL_GPL(__ct_user_enter);
232 * This function should be noinstr but the below local_irq_restore() is
233 * unsafe because it involves illegal RCU uses through tracing and lockdep.
234 * This is unlikely to be fixed as this function is obsolete. The preferred
235 * way is to call __context_tracking_enter() through user_enter_irqoff()
236 * or context_tracking_guest_enter(). It should be the arch entry code
237 * responsibility to call into context tracking with IRQs disabled.
239 void ct_user_enter(enum ctx_state state)
244 * Some contexts may involve an exception occuring in an irq,
245 * leading to that nesting:
246 * ct_irq_enter() rcu_user_exit() rcu_user_exit() ct_irq_exit()
247 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
248 * helpers are enough to protect RCU uses inside the exception. So
249 * just return immediately if we detect we are in an IRQ.
254 local_irq_save(flags);
255 __ct_user_enter(state);
256 local_irq_restore(flags);
258 NOKPROBE_SYMBOL(ct_user_enter);
259 EXPORT_SYMBOL_GPL(ct_user_enter);
262 * user_enter_callable() - Unfortunate ASM callable version of user_enter() for
263 * archs that didn't manage to check the context tracking
264 * static key from low level code.
266 * This OBSOLETE function should be noinstr but it unsafely calls
267 * local_irq_restore(), involving illegal RCU uses through tracing and lockdep.
268 * This is unlikely to be fixed as this function is obsolete. The preferred
269 * way is to call user_enter_irqoff(). It should be the arch entry code
270 * responsibility to call into context tracking with IRQs disabled.
272 void user_enter_callable(void)
276 NOKPROBE_SYMBOL(user_enter_callable);
279 * __ct_user_exit - Inform the context tracking that the CPU is
280 * exiting user or guest mode and entering the kernel.
282 * This function must be called after we entered the kernel from user or
283 * guest space before any use of RCU read side critical section. This
284 * potentially include any high level kernel code like syscalls, exceptions,
285 * signal handling, etc...
287 * This call supports re-entrancy. This way it can be called from any exception
288 * handler without needing to know if we came from userspace or not.
290 void noinstr __ct_user_exit(enum ctx_state state)
292 if (!context_tracking_recursion_enter())
295 if (__this_cpu_read(context_tracking.state) == state) {
296 if (__this_cpu_read(context_tracking.active)) {
298 * We are going to run code that may use RCU. Inform
299 * RCU core about that (ie: we may need the tick again).
302 if (state == CONTEXT_USER) {
303 instrumentation_begin();
304 vtime_user_exit(current);
306 instrumentation_end();
309 __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
311 context_tracking_recursion_exit();
313 EXPORT_SYMBOL_GPL(__ct_user_exit);
317 * This function should be noinstr but the below local_irq_save() is
318 * unsafe because it involves illegal RCU uses through tracing and lockdep.
319 * This is unlikely to be fixed as this function is obsolete. The preferred
320 * way is to call __context_tracking_exit() through user_exit_irqoff()
321 * or context_tracking_guest_exit(). It should be the arch entry code
322 * responsibility to call into context tracking with IRQs disabled.
324 void ct_user_exit(enum ctx_state state)
331 local_irq_save(flags);
332 __ct_user_exit(state);
333 local_irq_restore(flags);
335 NOKPROBE_SYMBOL(ct_user_exit);
336 EXPORT_SYMBOL_GPL(ct_user_exit);
339 * user_exit_callable() - Unfortunate ASM callable version of user_exit() for
340 * archs that didn't manage to check the context tracking
341 * static key from low level code.
343 * This OBSOLETE function should be noinstr but it unsafely calls local_irq_save(),
344 * involving illegal RCU uses through tracing and lockdep. This is unlikely
345 * to be fixed as this function is obsolete. The preferred way is to call
346 * user_exit_irqoff(). It should be the arch entry code responsibility to
347 * call into context tracking with IRQs disabled.
349 void user_exit_callable(void)
353 NOKPROBE_SYMBOL(user_exit_callable);
355 void __init ct_cpu_track_user(int cpu)
357 static __initdata bool initialized = false;
359 if (!per_cpu(context_tracking.active, cpu)) {
360 per_cpu(context_tracking.active, cpu) = true;
361 static_branch_inc(&context_tracking_key);
367 #ifdef CONFIG_HAVE_TIF_NOHZ
369 * Set TIF_NOHZ to init/0 and let it propagate to all tasks through fork
370 * This assumes that init is the only task at this early boot stage.
372 set_tsk_thread_flag(&init_task, TIF_NOHZ);
374 WARN_ON_ONCE(!tasklist_empty());
379 #ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
380 void __init context_tracking_init(void)
384 for_each_possible_cpu(cpu)
385 ct_cpu_track_user(cpu);
389 #endif /* #ifdef CONFIG_CONTEXT_TRACKING_USER */