fs/proc/uptime.c: Fix idle time reporting in /proc/uptime
authorJosh Don <joshdon@google.com>
Fri, 27 Aug 2021 16:54:38 +0000 (09:54 -0700)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 5 Oct 2021 13:51:35 +0000 (15:51 +0200)
/proc/uptime reports idle time by reading the CPUTIME_IDLE field from
the per-cpu kcpustats. However, on NO_HZ systems, idle time is not
continually updated on idle cpus, leading this value to appear
incorrectly small.

/proc/stat performs an accounting update when reading idle time; we
can use the same approach for uptime.

With this patch, /proc/stat and /proc/uptime now agree on idle time.
Additionally, the following shows idle time tick up consistently on an
idle machine:

  (while true; do cat /proc/uptime; sleep 1; done) | awk '{print $2-prev; prev=$2}'

Reported-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lkml.kernel.org/r/20210827165438.3280779-1-joshdon@google.com
fs/proc/stat.c
fs/proc/uptime.c
include/linux/kernel_stat.h

index 6561a06..4fb8729 100644 (file)
@@ -24,7 +24,7 @@
 
 #ifdef arch_idle_time
 
-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
 {
        u64 idle;
 
@@ -46,7 +46,7 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
 
 #else
 
-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
 {
        u64 idle, idle_usecs = -1ULL;
 
index 5a1b228..deb99bc 100644 (file)
@@ -12,18 +12,22 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 {
        struct timespec64 uptime;
        struct timespec64 idle;
-       u64 nsec;
+       u64 idle_nsec;
        u32 rem;
        int i;
 
-       nsec = 0;
-       for_each_possible_cpu(i)
-               nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
+       idle_nsec = 0;
+       for_each_possible_cpu(i) {
+               struct kernel_cpustat kcs;
+
+               kcpustat_cpu_fetch(&kcs, i);
+               idle_nsec += get_idle_time(&kcs, i);
+       }
 
        ktime_get_boottime_ts64(&uptime);
        timens_add_boottime(&uptime);
 
-       idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+       idle.tv_sec = div_u64_rem(idle_nsec, NSEC_PER_SEC, &rem);
        idle.tv_nsec = rem;
        seq_printf(m, "%lu.%02lu %lu.%02lu\n",
                        (unsigned long) uptime.tv_sec,
index 44ae1a7..69ae6b2 100644 (file)
@@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64,
                                      enum cpu_usage_stat);
 extern void account_steal_time(u64);
 extern void account_idle_time(u64);
+extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)