psi: Optimize task switch inside shared cgroups
[linux-2.6-microblaze.git] / kernel / sched / psi.c
index 0fe6ff6..ee3c5b4 100644 (file)
@@ -219,17 +219,17 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
 {
        switch (state) {
        case PSI_IO_SOME:
-               return tasks[NR_IOWAIT];
+               return unlikely(tasks[NR_IOWAIT]);
        case PSI_IO_FULL:
-               return tasks[NR_IOWAIT] && !tasks[NR_RUNNING];
+               return unlikely(tasks[NR_IOWAIT] && !tasks[NR_RUNNING]);
        case PSI_MEM_SOME:
-               return tasks[NR_MEMSTALL];
+               return unlikely(tasks[NR_MEMSTALL]);
        case PSI_MEM_FULL:
-               return tasks[NR_MEMSTALL] && !tasks[NR_RUNNING];
+               return unlikely(tasks[NR_MEMSTALL] && !tasks[NR_RUNNING]);
        case PSI_CPU_SOME:
-               return tasks[NR_RUNNING] > tasks[NR_ONCPU];
+               return unlikely(tasks[NR_RUNNING] > tasks[NR_ONCPU]);
        case PSI_CPU_FULL:
-               return tasks[NR_RUNNING] && !tasks[NR_ONCPU];
+               return unlikely(tasks[NR_RUNNING] && !tasks[NR_ONCPU]);
        case PSI_NONIDLE:
                return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
                        tasks[NR_RUNNING];
@@ -729,7 +729,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
         * task in a cgroup is in_memstall, the corresponding groupc
         * on that cpu is in PSI_MEM_FULL state.
         */
-       if (groupc->tasks[NR_ONCPU] && cpu_curr(cpu)->in_memstall)
+       if (unlikely(groupc->tasks[NR_ONCPU] && cpu_curr(cpu)->in_memstall))
                state_mask |= (1 << PSI_MEM_FULL);
 
        groupc->state_mask = state_mask;
@@ -840,20 +840,35 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
                }
        }
 
-       /*
-        * If this is a voluntary sleep, dequeue will have taken care
-        * of the outgoing TSK_ONCPU alongside TSK_RUNNING already. We
-        * only need to deal with it during preemption.
-        */
-       if (sleep)
-               return;
-
        if (prev->pid) {
-               psi_flags_change(prev, TSK_ONCPU, 0);
+               int clear = TSK_ONCPU, set = 0;
+
+               /*
+                * When we're going to sleep, psi_dequeue() lets us handle
+                * TSK_RUNNING and TSK_IOWAIT here, where we can combine it
+                * with TSK_ONCPU and save walking common ancestors twice.
+                */
+               if (sleep) {
+                       clear |= TSK_RUNNING;
+                       if (prev->in_iowait)
+                               set |= TSK_IOWAIT;
+               }
+
+               psi_flags_change(prev, clear, set);
 
                iter = NULL;
                while ((group = iterate_groups(prev, &iter)) && group != common)
-                       psi_group_change(group, cpu, TSK_ONCPU, 0, true);
+                       psi_group_change(group, cpu, clear, set, true);
+
+               /*
+                * TSK_ONCPU is handled up to the common ancestor. If we're tasked
+                * with dequeuing too, finish that for the rest of the hierarchy.
+                */
+               if (sleep) {
+                       clear &= ~TSK_ONCPU;
+                       for (; group; group = iterate_groups(prev, &iter))
+                               psi_group_change(group, cpu, clear, set, true);
+               }
        }
 }