Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Jan 2020 18:07:09 +0000 (10:07 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 28 Jan 2020 18:07:09 +0000 (10:07 -0800)
Pull scheduler updates from Ingo Molnar:
 "These were the main changes in this cycle:

   - More -rt motivated separation of CONFIG_PREEMPT and
     CONFIG_PREEMPTION.

   - Add more low level scheduling topology sanity checks and warnings
     to filter out nonsensical topologies that break scheduling.

   - Extend uclamp constraints to influence wakeup CPU placement

   - Make the RT scheduler more aware of asymmetric topologies and CPU
     capacities, via uclamp metrics, if CONFIG_UCLAMP_TASK=y

   - Make idle CPU selection more consistent

   - Various fixes, smaller cleanups, updates and enhancements - please
     see the git log for details"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (58 commits)
  sched/fair: Define sched_idle_cpu() only for SMP configurations
  sched/topology: Assert non-NUMA topology masks don't (partially) overlap
  idle: fix spelling mistake "iterrupts" -> "interrupts"
  sched/fair: Remove redundant call to cpufreq_update_util()
  sched/psi: create /proc/pressure and /proc/pressure/{io|memory|cpu} only when psi enabled
  sched/fair: Fix sgc->{min,max}_capacity calculation for SD_OVERLAP
  sched/fair: calculate delta runnable load only when it's needed
  sched/cputime: move rq parameter in irqtime_account_process_tick
  stop_machine: Make stop_cpus() static
  sched/debug: Reset watchdog on all CPUs while processing sysrq-t
  sched/core: Fix size of rq::uclamp initialization
  sched/uclamp: Fix a bug in propagating uclamp value in new cgroups
  sched/fair: Load balance aggressively for SCHED_IDLE CPUs
  sched/fair : Improve update_sd_pick_busiest for spare capacity case
  watchdog: Remove soft_lockup_hrtimer_cnt and related code
  sched/rt: Make RT capacity-aware
  sched/fair: Make EAS wakeup placement consider uclamp restrictions
  sched/fair: Make task_fits_capacity() consider uclamp restrictions
  sched/uclamp: Rename uclamp_util_with() into uclamp_rq_util_with()
  sched/uclamp: Make uclamp util helpers use and return UL values
  ...

72 files changed:
arch/arc/kernel/entry.S
arch/arm/include/asm/switch_to.h
arch/arm/kernel/entry-armv.S
arch/arm/kernel/traps.c
arch/arm/mm/cache-v7.S
arch/arm/mm/cache-v7m.S
arch/arm64/Kconfig
arch/arm64/crypto/sha256-glue.c
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/preempt.h
arch/arm64/kernel/entry.S
arch/arm64/kernel/traps.c
arch/c6x/kernel/entry.S
arch/csky/kernel/entry.S
arch/h8300/kernel/entry.S
arch/hexagon/kernel/vm_entry.S
arch/ia64/kernel/entry.S
arch/ia64/kernel/kprobes.c
arch/microblaze/kernel/entry.S
arch/mips/include/asm/asmmacro.h
arch/mips/kernel/entry.S
arch/nds32/Kconfig
arch/nds32/kernel/ex-exit.S
arch/nios2/kernel/entry.S
arch/parisc/Kconfig
arch/parisc/kernel/entry.S
arch/powerpc/Kconfig
arch/powerpc/kernel/entry_32.S
arch/powerpc/kernel/entry_64.S
arch/riscv/kernel/entry.S
arch/s390/Kconfig
arch/s390/include/asm/preempt.h
arch/s390/kernel/dumpstack.c
arch/s390/kernel/entry.S
arch/sh/Kconfig
arch/sh/kernel/cpu/sh5/entry.S
arch/sh/kernel/entry-common.S
arch/sparc/Kconfig
arch/sparc/kernel/rtrap_64.S
arch/xtensa/kernel/entry.S
arch/xtensa/kernel/traps.c
drivers/xen/preempt.c
fs/btrfs/volumes.h
fs/stack.c
include/linux/fs.h
include/linux/genhd.h
include/linux/sched/cpufreq.h
include/linux/stop_machine.h
include/xen/xen-ops.h
kernel/Kconfig.locks
kernel/cpu.c
kernel/sched/clock.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/cpupri.c
kernel/sched/cpupri.h
kernel/sched/cputime.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/idle.c
kernel/sched/pelt.c
kernel/sched/psi.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/topology.c
kernel/sched/wait_bit.c
kernel/stop_machine.c
kernel/workqueue.c
lib/Kconfig.debug
mm/memory.c
mm/slub.c
net/core/dev.c

index 72be012..1f6bb18 100644 (file)
@@ -337,11 +337,11 @@ resume_user_mode_begin:
 resume_kernel_mode:
 
        ; Disable Interrupts from this point on
-       ; CONFIG_PREEMPT: This is a must for preempt_schedule_irq()
-       ; !CONFIG_PREEMPT: To ensure restore_regs is intr safe
+       ; CONFIG_PREEMPTION: This is a must for preempt_schedule_irq()
+       ; !CONFIG_PREEMPTION: To ensure restore_regs is intr safe
        IRQ_DISABLE     r9
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 
        ; Can't preempt if preemption disabled
        GET_CURR_THR_INFO_FROM_SP   r10
index d3e937d..007d8fe 100644 (file)
@@ -10,7 +10,7 @@
  * to ensure that the maintenance completes in case we migrate to another
  * CPU.
  */
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7)
+#if defined(CONFIG_PREEMPTION) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7)
 #define __complete_pending_tlbi()      dsb(ish)
 #else
 #define __complete_pending_tlbi()
index 858d4e5..77f5483 100644 (file)
@@ -211,7 +211,7 @@ __irq_svc:
        svc_entry
        irq_handler
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        ldr     r8, [tsk, #TI_PREEMPT]          @ get preempt count
        ldr     r0, [tsk, #TI_FLAGS]            @ get flags
        teq     r8, #0                          @ if preempt count != 0
@@ -226,7 +226,7 @@ ENDPROC(__irq_svc)
 
        .ltorg
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 svc_preempt:
        mov     r8, lr
 1:     bl      preempt_schedule_irq            @ irq en/disable is done inside
index c053abd..abb7dd7 100644 (file)
@@ -248,6 +248,8 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
 
 #ifdef CONFIG_PREEMPT
 #define S_PREEMPT " PREEMPT"
+#elif defined(CONFIG_PREEMPT_RT)
+#define S_PREEMPT " PREEMPT_RT"
 #else
 #define S_PREEMPT ""
 #endif
index 0ee8fc4..dc8f152 100644 (file)
@@ -135,13 +135,13 @@ flush_levels:
        and     r1, r1, #7                      @ mask of the bits for current cache only
        cmp     r1, #2                          @ see what cache we have at this level
        blt     skip                            @ skip if no cache, or just i-cache
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        save_and_disable_irqs_notrace r9        @ make cssr&csidr read atomic
 #endif
        mcr     p15, 2, r10, c0, c0, 0          @ select current cache level in cssr
        isb                                     @ isb to sych the new cssr&csidr
        mrc     p15, 1, r1, c0, c0, 0           @ read the new csidr
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        restore_irqs_notrace r9
 #endif
        and     r2, r1, #7                      @ extract the length of the cache lines
index a0035c4..1bc3a0a 100644 (file)
@@ -183,13 +183,13 @@ flush_levels:
        and     r1, r1, #7                      @ mask of the bits for current cache only
        cmp     r1, #2                          @ see what cache we have at this level
        blt     skip                            @ skip if no cache, or just i-cache
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        save_and_disable_irqs_notrace r9        @ make cssr&csidr read atomic
 #endif
        write_csselr r10, r1                    @ set current cache level
        isb                                     @ isb to sych the new cssr&csidr
        read_ccsidr r1                          @ read the new csidr
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        restore_irqs_notrace r9
 #endif
        and     r2, r1, #7                      @ extract the length of the cache lines
index 0f4124f..d2cebf6 100644 (file)
@@ -34,32 +34,32 @@ config ARM64
        select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
-       select ARCH_INLINE_READ_LOCK if !PREEMPT
-       select ARCH_INLINE_READ_LOCK_BH if !PREEMPT
-       select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT
-       select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT
-       select ARCH_INLINE_READ_UNLOCK if !PREEMPT
-       select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT
-       select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT
-       select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT
-       select ARCH_INLINE_WRITE_LOCK if !PREEMPT
-       select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT
-       select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT
-       select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT
-       select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT
-       select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT
-       select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT
-       select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT
-       select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPT
-       select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPT
-       select ARCH_INLINE_SPIN_LOCK if !PREEMPT
-       select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPT
-       select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPT
-       select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPT
-       select ARCH_INLINE_SPIN_UNLOCK if !PREEMPT
-       select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT
-       select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT
-       select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT
+       select ARCH_INLINE_READ_LOCK if !PREEMPTION
+       select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
+       select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPTION
+       select ARCH_INLINE_SPIN_TRYLOCK if !PREEMPTION
+       select ARCH_INLINE_SPIN_TRYLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK_BH if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_SPIN_LOCK_IRQSAVE if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
+       select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
        select ARCH_KEEP_MEMBLOCK
        select ARCH_USE_CMPXCHG_LOCKREF
        select ARCH_USE_QUEUED_RWLOCKS
index e273fac..999da59 100644 (file)
@@ -97,7 +97,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
                 * input when running on a preemptible kernel, but process the
                 * data block by block instead.
                 */
-               if (IS_ENABLED(CONFIG_PREEMPT) &&
+               if (IS_ENABLED(CONFIG_PREEMPTION) &&
                    chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
                        chunk = SHA256_BLOCK_SIZE -
                                sctx->count % SHA256_BLOCK_SIZE;
index 524b3ea..aca337d 100644 (file)
@@ -675,8 +675,8 @@ USER(\label, ic     ivau, \tmp2)                    // invalidate I line PoU
  * where <label> is optional, and marks the point where execution will resume
  * after a yield has been performed. If omitted, execution resumes right after
  * the endif_yield_neon invocation. Note that the entire sequence, including
- * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
- * is not defined.
+ * the provided patchup code, will be omitted from the image if
+ * CONFIG_PREEMPTION is not defined.
  *
  * As a convenience, in the case where no patchup code is required, the above
  * sequence may be abbreviated to
@@ -704,7 +704,7 @@ USER(\label, ic     ivau, \tmp2)                    // invalidate I line PoU
        .endm
 
        .macro          if_will_cond_yield_neon
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        get_current_task        x0
        ldr             x0, [x0, #TSK_TI_PREEMPT]
        sub             x0, x0, #PREEMPT_DISABLE_OFFSET
index d499516..80e946b 100644 (file)
@@ -79,11 +79,11 @@ static inline bool should_resched(int preempt_offset)
        return pc == preempt_offset;
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
 void preempt_schedule_notrace(void);
 #define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
index 1b6b7a8..9461d81 100644 (file)
@@ -602,7 +602,7 @@ el1_irq:
 
        irq_handler
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        ldr     x24, [tsk, #TSK_TI_PREEMPT]     // get preempt count
 alternative_if ARM64_HAS_IRQ_PRIO_MASKING
        /*
index 73caf35..cf402be 100644 (file)
@@ -144,9 +144,12 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
 
 #ifdef CONFIG_PREEMPT
 #define S_PREEMPT " PREEMPT"
+#elif defined(CONFIG_PREEMPT_RT)
+#define S_PREEMPT " PREEMPT_RT"
 #else
 #define S_PREEMPT ""
 #endif
+
 #define S_SMP " SMP"
 
 static int __die(const char *str, int err, struct pt_regs *regs)
index 4332a10..fb154d1 100644 (file)
@@ -18,7 +18,7 @@
 #define DP     B14
 #define SP     B15
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 #define resume_kernel restore_all
 #endif
 
@@ -287,7 +287,7 @@ work_notifysig:
        ;; is a little bit different
        ;;
 ENTRY(ret_from_exception)
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        MASK_INT B2
 #endif
 
@@ -557,7 +557,7 @@ ENDPROC(_nmi_handler)
        ;;
        ;; Jump to schedule() then return to ret_from_isr
        ;;
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 resume_kernel:
        GET_THREAD_INFO A12
        LDW     .D1T1   *+A12(THREAD_INFO_PREEMPT_COUNT),A1
@@ -582,7 +582,7 @@ preempt_schedule:
        B       .S2     preempt_schedule_irq
 #endif
        ADDKPC  .S2     preempt_schedule,B3,4
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 ENTRY(enable_exception)
        DINT
index a7a5b67..0077063 100644 (file)
@@ -277,7 +277,7 @@ ENTRY(csky_irq)
        zero_fp
        psrset  ee
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        mov     r9, sp                  /* Get current stack  pointer */
        bmaski  r10, THREAD_SHIFT
        andn    r9, r10                 /* Get thread_info */
@@ -294,7 +294,7 @@ ENTRY(csky_irq)
        mov     a0, sp
        jbsr    csky_do_IRQ
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        subi    r12, 1
        stw     r12, (r9, TINFO_PREEMPT)
        cmpnei  r12, 0
index 4ade5f8..c6e289b 100644 (file)
@@ -284,12 +284,12 @@ badsys:
        mov.l   er0,@(LER0:16,sp)
        bra     resume_userspace
 
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
 #define resume_kernel restore_all
 #endif
 
 ret_from_exception:
-#if defined(CONFIG_PREEMPT)
+#if defined(CONFIG_PREEMPTION)
        orc     #0xc0,ccr
 #endif
 ret_from_interrupt:
@@ -319,7 +319,7 @@ work_resched:
 restore_all:
        RESTORE_ALL                     /* Does RTE */
 
-#if defined(CONFIG_PREEMPT)
+#if defined(CONFIG_PREEMPTION)
 resume_kernel:
        mov.l   @(TI_PRE_COUNT:16,er4),er0
        bne     restore_all:8
index 4023fdb..554371d 100644 (file)
@@ -265,12 +265,12 @@ event_dispatch:
         * should be in the designated register (usually R19)
         *
         * If we were in kernel mode, we don't need to check scheduler
-        * or signals if CONFIG_PREEMPT is not set.  If set, then it has
+        * or signals if CONFIG_PREEMPTION is not set.  If set, then it has
         * to jump to a need_resched kind of block.
-        * BTW, CONFIG_PREEMPT is not supported yet.
+        * BTW, CONFIG_PREEMPTION is not supported yet.
         */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        R0 = #VM_INT_DISABLE
        trap1(#HVM_TRAP1_VMSETIE)
 #endif
index a9992be..2ac9263 100644 (file)
@@ -670,12 +670,12 @@ GLOBAL_ENTRY(ia64_leave_syscall)
         *
         * p6 controls whether current_thread_info()->flags needs to be check for
         * extra work.  We always check for extra work when returning to user-level.
-        * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+        * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count
         * is 0.  After extra work processing has been completed, execution
         * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check
         * needs to be redone.
         */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        RSM_PSR_I(p0, r2, r18)                  // disable interrupts
        cmp.eq pLvSys,p0=r0,r0                  // pLvSys=1: leave from syscall
 (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
@@ -685,7 +685,7 @@ GLOBAL_ENTRY(ia64_leave_syscall)
 (pUStk)        mov r21=0                       // r21 <- 0
        ;;
        cmp.eq p6,p0=r21,r0             // p6 <- pUStk || (preempt_count == 0)
-#else /* !CONFIG_PREEMPT */
+#else /* !CONFIG_PREEMPTION */
        RSM_PSR_I(pUStk, r2, r18)
        cmp.eq pLvSys,p0=r0,r0          // pLvSys=1: leave from syscall
 (pUStk)        cmp.eq.unc p6,p0=r0,r0          // p6 <- pUStk
@@ -814,12 +814,12 @@ GLOBAL_ENTRY(ia64_leave_kernel)
         *
         * p6 controls whether current_thread_info()->flags needs to be check for
         * extra work.  We always check for extra work when returning to user-level.
-        * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
+        * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count
         * is 0.  After extra work processing has been completed, execution
         * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
         * needs to be redone.
         */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        RSM_PSR_I(p0, r17, r31)                 // disable interrupts
        cmp.eq p0,pLvSys=r0,r0                  // pLvSys=0: leave from kernel
 (pKStk)        adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
@@ -1120,7 +1120,7 @@ skip_rbs_switch:
 
        /*
         * On entry:
-        *      r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
+        *      r20 = &current->thread_info->pre_count (if CONFIG_PREEMPTION)
         *      r31 = current->thread_info->flags
         * On exit:
         *      p6 = TRUE if work-pending-check needs to be redone
index b8356ed..a6d6a05 100644 (file)
@@ -841,7 +841,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
                return 1;
        }
 
-#if !defined(CONFIG_PREEMPT)
+#if !defined(CONFIG_PREEMPTION)
        if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
                /* Boost up -- we can execute copied instructions directly */
                ia64_psr(regs)->ri = p->ainsn.slot;
index de7083b..f6ded35 100644 (file)
@@ -728,7 +728,7 @@ no_intr_resched:
        bri     6f;
 /* MS: Return to kernel state. */
 2:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        lwi     r11, CURRENT_TASK, TS_THREAD_INFO;
        /* MS: get preempt_count from thread info */
        lwi     r5, r11, TI_PREEMPT_COUNT;
index feb069c..655f40d 100644 (file)
@@ -63,7 +63,7 @@
        .endm
 
        .macro  local_irq_disable reg=t0
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        lw      \reg, TI_PRE_COUNT($28)
        addi    \reg, \reg, 1
        sw      \reg, TI_PRE_COUNT($28)
@@ -73,7 +73,7 @@
        xori    \reg, \reg, 1
        mtc0    \reg, CP0_STATUS
        irq_disable_hazard
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        lw      \reg, TI_PRE_COUNT($28)
        addi    \reg, \reg, -1
        sw      \reg, TI_PRE_COUNT($28)
index 5469d43..4849a48 100644 (file)
@@ -19,7 +19,7 @@
 #include <asm/thread_info.h>
 #include <asm/war.h>
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 #define resume_kernel  restore_all
 #else
 #define __ret_from_irq ret_from_exception
@@ -27,7 +27,7 @@
 
        .text
        .align  5
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 FEXPORT(ret_from_exception)
        local_irq_disable                       # preempt stop
        b       __ret_from_irq
@@ -53,7 +53,7 @@ resume_userspace:
        bnez    t0, work_pending
        j       restore_all
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 resume_kernel:
        local_irq_disable
        lw      t0, TI_PRE_COUNT($28)
index 12c06a8..e30298e 100644 (file)
@@ -62,7 +62,7 @@ config GENERIC_HWEIGHT
 
 config GENERIC_LOCKBREAK
        def_bool y
-       depends on PREEMPT
+       depends on PREEMPTION
 
 config TRACE_IRQFLAGS_SUPPORT
        def_bool y
index 1df02a7..6a2966c 100644 (file)
@@ -72,7 +72,7 @@
        restore_user_regs_last
        .endm
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        .macro  preempt_stop
        .endm
 #else
@@ -158,7 +158,7 @@ no_work_pending:
 /*
  * preemptive kernel
  */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 resume_kernel:
        gie_disable
        lwi     $t0, [tsk+#TSK_TI_PREEMPT]
index 1e515cc..3d8d1d0 100644 (file)
@@ -365,7 +365,7 @@ ENTRY(ret_from_interrupt)
        ldw     r1, PT_ESTATUS(sp)      /* check if returning to kernel */
        TSTBNZ  r1, r1, ESTATUS_EU, Luser_return
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        GET_THREAD_INFO r1
        ldw     r4, TI_PREEMPT_COUNT(r1)
        bne     r4, r0, restore_all
index 2de83a0..71034b5 100644 (file)
@@ -82,7 +82,7 @@ config STACK_GROWSUP
 config GENERIC_LOCKBREAK
        bool
        default y
-       depends on SMP && PREEMPT
+       depends on SMP && PREEMPTION
 
 config ARCH_HAS_ILOG2_U32
        bool
index b96d744..9a03e29 100644 (file)
@@ -940,14 +940,14 @@ intr_restore:
        rfi
        nop
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 # define intr_do_preempt       intr_restore
-#endif /* !CONFIG_PREEMPT */
+#endif /* !CONFIG_PREEMPTION */
 
        .import schedule,code
 intr_do_resched:
        /* Only call schedule on return to userspace. If we're returning
-        * to kernel space, we may schedule if CONFIG_PREEMPT, otherwise
+        * to kernel space, we may schedule if CONFIG_PREEMPTION, otherwise
         * we jump back to intr_restore.
         */
        LDREG   PT_IASQ0(%r16), %r20
@@ -979,7 +979,7 @@ intr_do_resched:
         * and preempt_count is 0. otherwise, we continue on
         * our merry way back to the current running task.
         */
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        .import preempt_schedule_irq,code
 intr_do_preempt:
        rsm     PSW_SM_I, %r0           /* disable interrupts */
@@ -999,7 +999,7 @@ intr_do_preempt:
        nop
 
        b,n     intr_restore            /* ssm PSW_SM_I done by intr_restore */
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
        /*
         * External interrupts.
index 25a5dca..e7c6070 100644 (file)
@@ -106,7 +106,7 @@ config LOCKDEP_SUPPORT
 config GENERIC_LOCKBREAK
        bool
        default y
-       depends on SMP && PREEMPT
+       depends on SMP && PREEMPTION
 
 config GENERIC_HWEIGHT
        bool
index d60908e..e1a4c39 100644 (file)
@@ -897,7 +897,7 @@ resume_kernel:
        bne-    0b
 1:
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        /* check current_thread_info->preempt_count */
        lwz     r0,TI_PREEMPT(r2)
        cmpwi   0,r0,0          /* if non-zero, just restore regs and return */
@@ -921,7 +921,7 @@ resume_kernel:
         */
        bl      trace_hardirqs_on
 #endif
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 restore_kuap:
        kuap_restore r1, r2, r9, r10, r0
 
index 3fd3ef3..a9a1d3c 100644 (file)
@@ -846,7 +846,7 @@ resume_kernel:
        bne-    0b
 1:
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        /* Check if we need to preempt */
        andi.   r0,r4,_TIF_NEED_RESCHED
        beq+    restore
@@ -877,7 +877,7 @@ resume_kernel:
        li      r10,MSR_RI
        mtmsrd  r10,1             /* Update machine state */
 #endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
        .globl  fast_exc_return_irq
 fast_exc_return_irq:
index e163b7b..bad4d85 100644 (file)
@@ -155,7 +155,7 @@ _save_context:
        REG_L x2,  PT_SP(sp)
        .endm
 
-#if !IS_ENABLED(CONFIG_PREEMPT)
+#if !IS_ENABLED(CONFIG_PREEMPTION)
 .set resume_kernel, restore_all
 #endif
 
@@ -305,7 +305,7 @@ restore_all:
        sret
 #endif
 
-#if IS_ENABLED(CONFIG_PREEMPT)
+#if IS_ENABLED(CONFIG_PREEMPTION)
 resume_kernel:
        REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
        bnez s0, restore_all
index 8cc2eea..287714d 100644 (file)
@@ -30,7 +30,7 @@ config GENERIC_BUG_RELATIVE_POINTERS
        def_bool y
 
 config GENERIC_LOCKBREAK
-       def_bool y if PREEMPT
+       def_bool y if PREEMPTTION
 
 config PGSTE
        def_bool y if KVM
index b5ea9e1..6ede299 100644 (file)
@@ -130,11 +130,11 @@ static inline bool should_resched(int preempt_offset)
 
 #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 extern asmlinkage void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
 extern asmlinkage void preempt_schedule_notrace(void);
 #define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
index d306fe0..2c122d8 100644 (file)
@@ -195,6 +195,8 @@ void die(struct pt_regs *regs, const char *str)
               regs->int_code >> 17, ++die_counter);
 #ifdef CONFIG_PREEMPT
        pr_cont("PREEMPT ");
+#elif defined(CONFIG_PREEMPT_RT)
+       pr_cont("PREEMPT_RT ");
 #endif
        pr_cont("SMP ");
        if (debug_pagealloc_enabled())
index 270d1d1..9205add 100644 (file)
@@ -790,7 +790,7 @@ ENTRY(io_int_handler)
 .Lio_work:
        tm      __PT_PSW+1(%r11),0x01   # returning to user ?
        jo      .Lio_work_user          # yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        # check for preemptive scheduling
        icm     %r0,15,__LC_PREEMPT_COUNT
        jnz     .Lio_restore            # preemption is disabled
index f356ee6..9ece111 100644 (file)
@@ -108,7 +108,7 @@ config GENERIC_CALIBRATE_DELAY
 
 config GENERIC_LOCKBREAK
        def_bool y
-       depends on SMP && PREEMPT
+       depends on SMP && PREEMPTION
 
 config ARCH_SUSPEND_POSSIBLE
        def_bool n
index de68ffd..81c8b64 100644 (file)
@@ -86,7 +86,7 @@
        andi    r6, ~0xf0, r6;          \
        putcon  r6, SR;
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 #  define preempt_stop()       CLI()
 #else
 #  define preempt_stop()
@@ -884,7 +884,7 @@ ret_from_exception:
 
        /* Check softirqs */
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        pta   ret_from_syscall, tr0
        blink   tr0, ZERO
 
index d31f66e..956a7a0 100644 (file)
@@ -41,7 +41,7 @@
  */
 #include <asm/dwarf.h>
 
-#if defined(CONFIG_PREEMPT)
+#if defined(CONFIG_PREEMPTION)
 #  define preempt_stop()       cli ; TRACE_IRQS_OFF
 #else
 #  define preempt_stop()
@@ -84,7 +84,7 @@ ENTRY(ret_from_irq)
        get_current_thread_info r8, r0
        bt      resume_kernel   ! Yes, it's from kernel, go back soon
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        bra     resume_userspace
         nop
 ENTRY(resume_kernel)
index eb24cb1..e8c3ea0 100644 (file)
@@ -277,7 +277,7 @@ config US3_MC
 config GENERIC_LOCKBREAK
        bool
        default y
-       depends on SPARC64 && SMP && PREEMPT
+       depends on SPARC64 && SMP && PREEMPTION
 
 config NUMA
        bool "NUMA support"
index 29aa34f..c5fd4b4 100644 (file)
@@ -310,7 +310,7 @@ kern_rtt_restore:
                retry
 
 to_kernel:
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
                ldsw                    [%g6 + TI_PRE_COUNT], %l5
                brnz                    %l5, kern_fpucheck
                 ldx                    [%g6 + TI_FLAGS], %l5
index be89780..2c9e485 100644 (file)
@@ -520,7 +520,7 @@ common_exception_return:
        call4   schedule        # void schedule (void)
        j       1b
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 6:
        _bbci.l a4, TIF_NEED_RESCHED, 4f
 
index 87bd68d..0976e27 100644 (file)
@@ -519,12 +519,15 @@ DEFINE_SPINLOCK(die_lock);
 void die(const char * str, struct pt_regs * regs, long err)
 {
        static int die_counter;
+       const char *pr = "";
+
+       if (IS_ENABLED(CONFIG_PREEMPTION))
+               pr = IS_ENABLED(CONFIG_PREEMPT_RT) ? " PREEMPT_RT" : " PREEMPT";
 
        console_verbose();
        spin_lock_irq(&die_lock);
 
-       pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter,
-               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "");
+       pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, pr);
        show_regs(regs);
        if (!user_mode(regs))
                show_stack(NULL, (unsigned long*)regs->areg[1]);
index 8b9919c..70650b2 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/sched.h>
 #include <xen/xen-ops.h>
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 
 /*
  * Some hypercalls issued by the toolstack can take many 10s of
@@ -37,4 +37,4 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void)
                __this_cpu_write(xen_in_preemptible_hcall, true);
        }
 }
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
index fc1b564..0ee5386 100644 (file)
@@ -168,7 +168,7 @@ btrfs_device_set_##name(struct btrfs_device *dev, u64 size)         \
        write_seqcount_end(&dev->data_seqcount);                        \
        preempt_enable();                                               \
 }
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
 #define BTRFS_DEVICE_GETSET_FUNCS(name)                                        \
 static inline u64                                                      \
 btrfs_device_get_##name(const struct btrfs_device *dev)                        \
index 4ef2c05..c983092 100644 (file)
@@ -23,7 +23,7 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
 
        /*
         * But on 32-bit, we ought to make an effort to keep the two halves of
-        * i_blocks in sync despite SMP or PREEMPT - though stat's
+        * i_blocks in sync despite SMP or PREEMPTION - though stat's
         * generic_fillattr() doesn't bother, and we won't be applying quotas
         * (where i_blocks does become important) at the upper level.
         *
@@ -38,14 +38,14 @@ void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
                spin_unlock(&src->i_lock);
 
        /*
-        * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
+        * If CONFIG_SMP or CONFIG_PREEMPTION on 32-bit, it's vital for
         * fsstack_copy_inode_size() to hold some lock around
         * i_size_write(), otherwise i_size_read() may spin forever (see
         * include/linux/fs.h).  We don't necessarily hold i_mutex when this
         * is called, so take i_lock for that case.
         *
         * And if on 32-bit, continue our effort to keep the two halves of
-        * i_blocks in sync despite SMP or PREEMPT: use i_lock  for that case
+        * i_blocks in sync despite SMP or PREEMPTION: use i_lock for that case
         * too, and do both at once by combining the tests.
         *
         * There is none of this locking overhead in the 64-bit case.
index 98e0349..dddfcbb 100644 (file)
@@ -855,7 +855,7 @@ static inline loff_t i_size_read(const struct inode *inode)
                i_size = inode->i_size;
        } while (read_seqcount_retry(&inode->i_size_seqcount, seq));
        return i_size;
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        loff_t i_size;
 
        preempt_disable();
@@ -880,7 +880,7 @@ static inline void i_size_write(struct inode *inode, loff_t i_size)
        inode->i_size = i_size;
        write_seqcount_end(&inode->i_size_seqcount);
        preempt_enable();
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        preempt_disable();
        inode->i_size = i_size;
        preempt_enable();
index ea4c133..6fbe585 100644 (file)
@@ -730,7 +730,7 @@ static inline void hd_free_part(struct hd_struct *part)
  * accessor function.
  *
  * Code written along the lines of i_size_read() and i_size_write().
- * CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
+ * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption
  * on.
  */
 static inline sector_t part_nr_sects_read(struct hd_struct *part)
@@ -743,7 +743,7 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part)
                nr_sects = part->nr_sects;
        } while (read_seqcount_retry(&part->nr_sects_seq, seq));
        return nr_sects;
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        sector_t nr_sects;
 
        preempt_disable();
@@ -766,7 +766,7 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
        write_seqcount_begin(&part->nr_sects_seq);
        part->nr_sects = size;
        write_seqcount_end(&part->nr_sects_seq);
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        preempt_disable();
        part->nr_sects = size;
        preempt_enable();
index cc6bcc1..3ed5aa1 100644 (file)
@@ -9,7 +9,6 @@
  */
 
 #define SCHED_CPUFREQ_IOWAIT   (1U << 0)
-#define SCHED_CPUFREQ_MIGRATION        (1U << 1)
 
 #ifdef CONFIG_CPU_FREQ
 struct cpufreq_policy;
index f9a0c61..76d8b09 100644 (file)
@@ -32,8 +32,6 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
 int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
 bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
                         struct cpu_stop_work *work_buf);
-int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
-int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 void stop_machine_park(int cpu);
 void stop_machine_unpark(int cpu);
 void stop_machine_yield(const struct cpumask *cpumask);
@@ -82,20 +80,6 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
        return false;
 }
 
-static inline int stop_cpus(const struct cpumask *cpumask,
-                           cpu_stop_fn_t fn, void *arg)
-{
-       if (cpumask_test_cpu(raw_smp_processor_id(), cpumask))
-               return stop_one_cpu(raw_smp_processor_id(), fn, arg);
-       return -ENOENT;
-}
-
-static inline int try_stop_cpus(const struct cpumask *cpumask,
-                               cpu_stop_fn_t fn, void *arg)
-{
-       return stop_cpus(cpumask, fn, arg);
-}
-
 #endif /* CONFIG_SMP */
 
 /*
index d89969a..095be1d 100644 (file)
@@ -215,7 +215,7 @@ bool xen_running_on_version_or_later(unsigned int major, unsigned int minor);
 void xen_efi_runtime_setup(void);
 
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 
 static inline void xen_preemptible_hcall_begin(void)
 {
@@ -239,6 +239,6 @@ static inline void xen_preemptible_hcall_end(void)
        __this_cpu_write(xen_in_preemptible_hcall, false);
 }
 
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* INCLUDE_XEN_OPS_H */
index e0852dc..3de8fd1 100644 (file)
@@ -101,7 +101,7 @@ config UNINLINE_SPIN_UNLOCK
 # unlock and unlock_irq functions are inlined when:
 #   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
 #  or
-#   - DEBUG_SPINLOCK=n and PREEMPT=n
+#   - DEBUG_SPINLOCK=n and PREEMPTION=n
 #
 # unlock_bh and unlock_irqrestore functions are inlined when:
 #   - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y
@@ -139,7 +139,7 @@ config INLINE_SPIN_UNLOCK_BH
 
 config INLINE_SPIN_UNLOCK_IRQ
        def_bool y
-       depends on !PREEMPT || ARCH_INLINE_SPIN_UNLOCK_IRQ
+       depends on !PREEMPTION || ARCH_INLINE_SPIN_UNLOCK_IRQ
 
 config INLINE_SPIN_UNLOCK_IRQRESTORE
        def_bool y
@@ -168,7 +168,7 @@ config INLINE_READ_LOCK_IRQSAVE
 
 config INLINE_READ_UNLOCK
        def_bool y
-       depends on !PREEMPT || ARCH_INLINE_READ_UNLOCK
+       depends on !PREEMPTION || ARCH_INLINE_READ_UNLOCK
 
 config INLINE_READ_UNLOCK_BH
        def_bool y
@@ -176,7 +176,7 @@ config INLINE_READ_UNLOCK_BH
 
 config INLINE_READ_UNLOCK_IRQ
        def_bool y
-       depends on !PREEMPT || ARCH_INLINE_READ_UNLOCK_IRQ
+       depends on !PREEMPTION || ARCH_INLINE_READ_UNLOCK_IRQ
 
 config INLINE_READ_UNLOCK_IRQRESTORE
        def_bool y
@@ -205,7 +205,7 @@ config INLINE_WRITE_LOCK_IRQSAVE
 
 config INLINE_WRITE_UNLOCK
        def_bool y
-       depends on !PREEMPT || ARCH_INLINE_WRITE_UNLOCK
+       depends on !PREEMPTION || ARCH_INLINE_WRITE_UNLOCK
 
 config INLINE_WRITE_UNLOCK_BH
        def_bool y
@@ -213,7 +213,7 @@ config INLINE_WRITE_UNLOCK_BH
 
 config INLINE_WRITE_UNLOCK_IRQ
        def_bool y
-       depends on !PREEMPT || ARCH_INLINE_WRITE_UNLOCK_IRQ
+       depends on !PREEMPTION || ARCH_INLINE_WRITE_UNLOCK_IRQ
 
 config INLINE_WRITE_UNLOCK_IRQRESTORE
        def_bool y
index 4dc279e..9c706af 100644 (file)
@@ -525,8 +525,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
        if (WARN_ON_ONCE((!cpu_online(cpu))))
                return -ECANCELED;
 
-       /* Unpark the stopper thread and the hotplug thread of the target cpu */
-       stop_machine_unpark(cpu);
+       /* Unpark the hotplug thread of the target cpu */
        kthread_unpark(st->thread);
 
        /*
@@ -1089,8 +1088,8 @@ void notify_cpu_starting(unsigned int cpu)
 
 /*
  * Called from the idle task. Wake up the controlling task which brings the
- * stopper and the hotplug thread of the upcoming CPU up and then delegates
- * the rest of the online bringup to the hotplug thread.
+ * hotplug thread of the upcoming CPU up and then delegates the rest of the
+ * online bringup to the hotplug thread.
  */
 void cpuhp_online_idle(enum cpuhp_state state)
 {
@@ -1100,6 +1099,12 @@ void cpuhp_online_idle(enum cpuhp_state state)
        if (state != CPUHP_AP_ONLINE_IDLE)
                return;
 
+       /*
+        * Unpart the stopper thread before we start the idle loop (and start
+        * scheduling); this ensures the stopper task is always available.
+        */
+       stop_machine_unpark(smp_processor_id());
+
        st->state = CPUHP_AP_ONLINE_IDLE;
        complete_ap_thread(st, true);
 }
index 1152259..12bca64 100644 (file)
@@ -370,7 +370,7 @@ u64 sched_clock_cpu(int cpu)
        if (sched_clock_stable())
                return sched_clock() + __sched_clock_offset;
 
-       if (!static_branch_unlikely(&sched_clock_running))
+       if (!static_branch_likely(&sched_clock_running))
                return sched_clock();
 
        preempt_disable_notrace();
@@ -393,7 +393,7 @@ void sched_clock_tick(void)
        if (sched_clock_stable())
                return;
 
-       if (!static_branch_unlikely(&sched_clock_running))
+       if (!static_branch_likely(&sched_clock_running))
                return;
 
        lockdep_assert_irqs_disabled();
@@ -460,7 +460,7 @@ void __init sched_clock_init(void)
 
 u64 sched_clock_cpu(int cpu)
 {
-       if (!static_branch_unlikely(&sched_clock_running))
+       if (!static_branch_likely(&sched_clock_running))
                return 0;
 
        return sched_clock();
index 90e4b00..fc1dfc0 100644 (file)
@@ -919,17 +919,17 @@ uclamp_eff_get(struct task_struct *p, enum uclamp_id clamp_id)
        return uc_req;
 }
 
-unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
+unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
 {
        struct uclamp_se uc_eff;
 
        /* Task currently refcounted: use back-annotated (effective) value */
        if (p->uclamp[clamp_id].active)
-               return p->uclamp[clamp_id].value;
+               return (unsigned long)p->uclamp[clamp_id].value;
 
        uc_eff = uclamp_eff_get(p, clamp_id);
 
-       return uc_eff.value;
+       return (unsigned long)uc_eff.value;
 }
 
 /*
@@ -1253,7 +1253,8 @@ static void __init init_uclamp(void)
        mutex_init(&uclamp_mutex);
 
        for_each_possible_cpu(cpu) {
-               memset(&cpu_rq(cpu)->uclamp, 0, sizeof(struct uclamp_rq));
+               memset(&cpu_rq(cpu)->uclamp, 0,
+                               sizeof(struct uclamp_rq)*UCLAMP_CNT);
                cpu_rq(cpu)->uclamp_flags = 0;
        }
 
@@ -4504,7 +4505,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio)
 void set_user_nice(struct task_struct *p, long nice)
 {
        bool queued, running;
-       int old_prio, delta;
+       int old_prio;
        struct rq_flags rf;
        struct rq *rq;
 
@@ -4538,19 +4539,18 @@ void set_user_nice(struct task_struct *p, long nice)
        set_load_weight(p, true);
        old_prio = p->prio;
        p->prio = effective_prio(p);
-       delta = p->prio - old_prio;
 
-       if (queued) {
+       if (queued)
                enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
-               /*
-                * If the task increased its priority or is running and
-                * lowered its priority, then reschedule its CPU:
-                */
-               if (delta < 0 || (delta > 0 && task_running(rq, p)))
-                       resched_curr(rq);
-       }
        if (running)
                set_next_task(rq, p);
+
+       /*
+        * If the task increased its priority or is running and
+        * lowered its priority, then reschedule its CPU:
+        */
+       p->sched_class->prio_changed(rq, p, old_prio);
+
 out_unlock:
        task_rq_unlock(rq, p, &rf);
 }
@@ -7100,6 +7100,12 @@ static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
 
        if (parent)
                sched_online_group(tg, parent);
+
+#ifdef CONFIG_UCLAMP_TASK_GROUP
+       /* Propagate the effective uclamp value for the new group */
+       cpu_util_update_eff(css);
+#endif
+
        return 0;
 }
 
index 9b8916f..7fbaee2 100644 (file)
@@ -238,7 +238,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
         */
        util = util_cfs + cpu_util_rt(rq);
        if (type == FREQUENCY_UTIL)
-               util = uclamp_util_with(rq, util, p);
+               util = uclamp_rq_util_with(rq, util, p);
 
        dl_util = cpu_util_dl(rq);
 
index b7abca9..1a2719e 100644 (file)
@@ -46,6 +46,8 @@ static int convert_prio(int prio)
  * @cp: The cpupri context
  * @p: The task
  * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
+ * @fitness_fn: A pointer to a function to do custom checks whether the CPU
+ *              fits a specific criteria so that we only return those CPUs.
  *
  * Note: This function returns the recommended CPUs as calculated during the
  * current invocation.  By the time the call returns, the CPUs may have in
@@ -57,7 +59,8 @@ static int convert_prio(int prio)
  * Return: (int)bool - CPUs were found
  */
 int cpupri_find(struct cpupri *cp, struct task_struct *p,
-               struct cpumask *lowest_mask)
+               struct cpumask *lowest_mask,
+               bool (*fitness_fn)(struct task_struct *p, int cpu))
 {
        int idx = 0;
        int task_pri = convert_prio(p->prio);
@@ -98,6 +101,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
                        continue;
 
                if (lowest_mask) {
+                       int cpu;
+
                        cpumask_and(lowest_mask, p->cpus_ptr, vec->mask);
 
                        /*
@@ -108,7 +113,23 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
                         * condition, simply act as though we never hit this
                         * priority level and continue on.
                         */
-                       if (cpumask_any(lowest_mask) >= nr_cpu_ids)
+                       if (cpumask_empty(lowest_mask))
+                               continue;
+
+                       if (!fitness_fn)
+                               return 1;
+
+                       /* Ensure the capacity of the CPUs fit the task */
+                       for_each_cpu(cpu, lowest_mask) {
+                               if (!fitness_fn(p, cpu))
+                                       cpumask_clear_cpu(cpu, lowest_mask);
+                       }
+
+                       /*
+                        * If no CPU at the current priority can fit the task
+                        * continue looking
+                        */
+                       if (cpumask_empty(lowest_mask))
                                continue;
                }
 
index 7dc20a3..32dd520 100644 (file)
@@ -18,7 +18,9 @@ struct cpupri {
 };
 
 #ifdef CONFIG_SMP
-int  cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask);
+int  cpupri_find(struct cpupri *cp, struct task_struct *p,
+                struct cpumask *lowest_mask,
+                bool (*fitness_fn)(struct task_struct *p, int cpu));
 void cpupri_set(struct cpupri *cp, int cpu, int pri);
 int  cpupri_init(struct cpupri *cp);
 void cpupri_cleanup(struct cpupri *cp);
index d43318a..cff3e65 100644 (file)
@@ -355,7 +355,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
  * softirq as those do not count in task exec_runtime any more.
  */
 static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
-                                        struct rq *rq, int ticks)
+                                        int ticks)
 {
        u64 other, cputime = TICK_NSEC * ticks;
 
@@ -381,7 +381,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
                account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
        } else if (user_tick) {
                account_user_time(p, cputime);
-       } else if (p == rq->idle) {
+       } else if (p == this_rq()->idle) {
                account_idle_time(cputime);
        } else if (p->flags & PF_VCPU) { /* System time or guest time */
                account_guest_time(p, cputime);
@@ -392,14 +392,12 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
 
 static void irqtime_account_idle_ticks(int ticks)
 {
-       struct rq *rq = this_rq();
-
-       irqtime_account_process_tick(current, 0, rq, ticks);
+       irqtime_account_process_tick(current, 0, ticks);
 }
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
 static inline void irqtime_account_idle_ticks(int ticks) { }
 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
-                                               struct rq *rq, int nr_ticks) { }
+                                               int nr_ticks) { }
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 /*
@@ -473,13 +471,12 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 void account_process_tick(struct task_struct *p, int user_tick)
 {
        u64 cputime, steal;
-       struct rq *rq = this_rq();
 
        if (vtime_accounting_enabled_this_cpu())
                return;
 
        if (sched_clock_irqtime) {
-               irqtime_account_process_tick(p, user_tick, rq, 1);
+               irqtime_account_process_tick(p, user_tick, 1);
                return;
        }
 
@@ -493,7 +490,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
 
        if (user_tick)
                account_user_time(p, cputime);
-       else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
+       else if ((p != this_rq()->idle) || (irq_count() != HARDIRQ_OFFSET))
                account_system_time(p, HARDIRQ_OFFSET, cputime);
        else
                account_idle_time(cputime);
index f7e4579..879d3cc 100644 (file)
@@ -751,9 +751,16 @@ void sysrq_sched_debug_show(void)
        int cpu;
 
        sched_debug_header(NULL);
-       for_each_online_cpu(cpu)
+       for_each_online_cpu(cpu) {
+               /*
+                * Need to reset softlockup watchdogs on all CPUs, because
+                * another CPU might be blocked waiting for us to process
+                * an IPI or stop_machine.
+                */
+               touch_nmi_watchdog();
+               touch_all_softlockup_watchdogs();
                print_cpu(NULL, cpu);
-
+       }
 }
 
 /*
index ba749f5..fe4e0d7 100644 (file)
@@ -801,7 +801,7 @@ void post_init_entity_util_avg(struct task_struct *p)
                 * For !fair tasks do:
                 *
                update_cfs_rq_load_avg(now, cfs_rq);
-               attach_entity_load_avg(cfs_rq, se, 0);
+               attach_entity_load_avg(cfs_rq, se);
                switched_from_fair(rq, p);
                 *
                 * such that the next switched_to_fair() has the
@@ -3114,7 +3114,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 {
        struct rq *rq = rq_of(cfs_rq);
 
-       if (&rq->cfs == cfs_rq || (flags & SCHED_CPUFREQ_MIGRATION)) {
+       if (&rq->cfs == cfs_rq) {
                /*
                 * There are a few boundary cases this might miss but it should
                 * get called often enough that that should (hopefully) not be
@@ -3366,16 +3366,17 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
 
        runnable_load_sum = (s64)se_runnable(se) * runnable_sum;
        runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX);
-       delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum;
-       delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
-
-       se->avg.runnable_load_sum = runnable_sum;
-       se->avg.runnable_load_avg = runnable_load_avg;
 
        if (se->on_rq) {
+               delta_sum = runnable_load_sum -
+                               se_weight(se) * se->avg.runnable_load_sum;
+               delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
                add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg);
                add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum);
        }
+
+       se->avg.runnable_load_sum = runnable_sum;
+       se->avg.runnable_load_avg = runnable_load_avg;
 }
 
 static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3520,7 +3521,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
  * Must call update_cfs_rq_load_avg() before this, since we rely on
  * cfs_rq->avg.last_update_time being current.
  */
-static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
 
@@ -3556,7 +3557,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
        add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
 
-       cfs_rq_util_change(cfs_rq, flags);
+       cfs_rq_util_change(cfs_rq, 0);
 
        trace_pelt_cfs_tp(cfs_rq);
 }
@@ -3614,7 +3615,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
                 *
                 * IOW we're enqueueing a task on a new CPU.
                 */
-               attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
+               attach_entity_load_avg(cfs_rq, se);
                update_tg_load_avg(cfs_rq, 0);
 
        } else if (decayed) {
@@ -3711,6 +3712,20 @@ static inline unsigned long task_util_est(struct task_struct *p)
        return max(task_util(p), _task_util_est(p));
 }
 
+#ifdef CONFIG_UCLAMP_TASK
+static inline unsigned long uclamp_task_util(struct task_struct *p)
+{
+       return clamp(task_util_est(p),
+                    uclamp_eff_value(p, UCLAMP_MIN),
+                    uclamp_eff_value(p, UCLAMP_MAX));
+}
+#else
+static inline unsigned long uclamp_task_util(struct task_struct *p)
+{
+       return task_util_est(p);
+}
+#endif
+
 static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
                                    struct task_struct *p)
 {
@@ -3822,7 +3837,7 @@ done:
 
 static inline int task_fits_capacity(struct task_struct *p, long capacity)
 {
-       return fits_capacity(task_util_est(p), capacity);
+       return fits_capacity(uclamp_task_util(p), capacity);
 }
 
 static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
@@ -3857,7 +3872,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 static inline void remove_entity_load_avg(struct sched_entity *se) {}
 
 static inline void
-attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) {}
+attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void
 detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 
@@ -5196,6 +5211,20 @@ static inline void update_overutilized_status(struct rq *rq)
 static inline void update_overutilized_status(struct rq *rq) { }
 #endif
 
+/* Runqueue only has SCHED_IDLE tasks enqueued */
+static int sched_idle_rq(struct rq *rq)
+{
+       return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
+                       rq->nr_running);
+}
+
+#ifdef CONFIG_SMP
+static int sched_idle_cpu(int cpu)
+{
+       return sched_idle_rq(cpu_rq(cpu));
+}
+#endif
+
 /*
  * The enqueue_task method is called before nr_running is
  * increased. Here we update the fair scheduling stats and
@@ -5310,6 +5339,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        struct sched_entity *se = &p->se;
        int task_sleep = flags & DEQUEUE_SLEEP;
        int idle_h_nr_running = task_has_idle_policy(p);
+       bool was_sched_idle = sched_idle_rq(rq);
 
        for_each_sched_entity(se) {
                cfs_rq = cfs_rq_of(se);
@@ -5356,6 +5386,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        if (!se)
                sub_nr_running(rq, 1);
 
+       /* balance early to pull high priority tasks */
+       if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
+               rq->next_balance = jiffies;
+
        util_est_dequeue(&rq->cfs, p, task_sleep);
        hrtick_update(rq);
 }
@@ -5378,15 +5412,6 @@ static struct {
 
 #endif /* CONFIG_NO_HZ_COMMON */
 
-/* CPU only has SCHED_IDLE tasks enqueued */
-static int sched_idle_cpu(int cpu)
-{
-       struct rq *rq = cpu_rq(cpu);
-
-       return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
-                       rq->nr_running);
-}
-
 static unsigned long cpu_load(struct rq *rq)
 {
        return cfs_rq_load_avg(&rq->cfs);
@@ -5588,7 +5613,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
        unsigned int min_exit_latency = UINT_MAX;
        u64 latest_idle_timestamp = 0;
        int least_loaded_cpu = this_cpu;
-       int shallowest_idle_cpu = -1, si_cpu = -1;
+       int shallowest_idle_cpu = -1;
        int i;
 
        /* Check if we have any choice: */
@@ -5597,6 +5622,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
 
        /* Traverse only the allowed CPUs */
        for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+               if (sched_idle_cpu(i))
+                       return i;
+
                if (available_idle_cpu(i)) {
                        struct rq *rq = cpu_rq(i);
                        struct cpuidle_state *idle = idle_get_state(rq);
@@ -5619,12 +5647,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                                latest_idle_timestamp = rq->idle_stamp;
                                shallowest_idle_cpu = i;
                        }
-               } else if (shallowest_idle_cpu == -1 && si_cpu == -1) {
-                       if (sched_idle_cpu(i)) {
-                               si_cpu = i;
-                               continue;
-                       }
-
+               } else if (shallowest_idle_cpu == -1) {
                        load = cpu_load(cpu_rq(i));
                        if (load < min_load) {
                                min_load = load;
@@ -5633,11 +5656,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                }
        }
 
-       if (shallowest_idle_cpu != -1)
-               return shallowest_idle_cpu;
-       if (si_cpu != -1)
-               return si_cpu;
-       return least_loaded_cpu;
+       return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
 }
 
 static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
@@ -5790,7 +5809,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
  */
 static int select_idle_smt(struct task_struct *p, int target)
 {
-       int cpu, si_cpu = -1;
+       int cpu;
 
        if (!static_branch_likely(&sched_smt_present))
                return -1;
@@ -5798,13 +5817,11 @@ static int select_idle_smt(struct task_struct *p, int target)
        for_each_cpu(cpu, cpu_smt_mask(target)) {
                if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                        continue;
-               if (available_idle_cpu(cpu))
+               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
                        return cpu;
-               if (si_cpu == -1 && sched_idle_cpu(cpu))
-                       si_cpu = cpu;
        }
 
-       return si_cpu;
+       return -1;
 }
 
 #else /* CONFIG_SCHED_SMT */
@@ -5828,12 +5845,13 @@ static inline int select_idle_smt(struct task_struct *p, int target)
  */
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
+       struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
        struct sched_domain *this_sd;
        u64 avg_cost, avg_idle;
        u64 time, cost;
        s64 delta;
        int this = smp_processor_id();
-       int cpu, nr = INT_MAX, si_cpu = -1;
+       int cpu, nr = INT_MAX;
 
        this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
        if (!this_sd)
@@ -5859,15 +5877,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 
        time = cpu_clock(this);
 
-       for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+       cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+
+       for_each_cpu_wrap(cpu, cpus, target) {
                if (!--nr)
-                       return si_cpu;
-               if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-                       continue;
-               if (available_idle_cpu(cpu))
+                       return -1;
+               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
                        break;
-               if (si_cpu == -1 && sched_idle_cpu(cpu))
-                       si_cpu = cpu;
        }
 
        time = cpu_clock(this) - time;
@@ -6268,9 +6284,18 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                        if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                continue;
 
-                       /* Skip CPUs that will be overutilized. */
                        util = cpu_util_next(cpu, p, cpu);
                        cpu_cap = capacity_of(cpu);
+                       spare_cap = cpu_cap - util;
+
+                       /*
+                        * Skip CPUs that cannot satisfy the capacity request.
+                        * IOW, placing the task there would make the CPU
+                        * overutilized. Take uclamp into account to see how
+                        * much capacity we can get out of the CPU; this is
+                        * aligned with schedutil_cpu_util().
+                        */
+                       util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
                        if (!fits_capacity(util, cpu_cap))
                                continue;
 
@@ -6285,7 +6310,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                         * Find the CPU with the maximum spare capacity in
                         * the performance domain
                         */
-                       spare_cap = cpu_cap - util;
                        if (spare_cap > max_spare_cap) {
                                max_spare_cap = spare_cap;
                                max_spare_cap_cpu = cpu;
@@ -7780,29 +7804,11 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                 */
 
                for_each_cpu(cpu, sched_group_span(sdg)) {
-                       struct sched_group_capacity *sgc;
-                       struct rq *rq = cpu_rq(cpu);
+                       unsigned long cpu_cap = capacity_of(cpu);
 
-                       /*
-                        * build_sched_domains() -> init_sched_groups_capacity()
-                        * gets here before we've attached the domains to the
-                        * runqueues.
-                        *
-                        * Use capacity_of(), which is set irrespective of domains
-                        * in update_cpu_capacity().
-                        *
-                        * This avoids capacity from being 0 and
-                        * causing divide-by-zero issues on boot.
-                        */
-                       if (unlikely(!rq->sd)) {
-                               capacity += capacity_of(cpu);
-                       } else {
-                               sgc = rq->sd->groups->sgc;
-                               capacity += sgc->capacity;
-                       }
-
-                       min_capacity = min(capacity, min_capacity);
-                       max_capacity = max(capacity, max_capacity);
+                       capacity += cpu_cap;
+                       min_capacity = min(cpu_cap, min_capacity);
+                       max_capacity = max(cpu_cap, max_capacity);
                }
        } else  {
                /*
@@ -8168,14 +8174,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 
        case group_has_spare:
                /*
-                * Select not overloaded group with lowest number of
-                * idle cpus. We could also compare the spare capacity
-                * which is more stable but it can end up that the
-                * group has less spare capacity but finally more idle
+                * Select not overloaded group with lowest number of idle cpus
+                * and highest number of running tasks. We could also compare
+                * the spare capacity which is more stable but it can end up
+                * that the group has less spare capacity but finally more idle
                 * CPUs which means less opportunity to pull tasks.
                 */
-               if (sgs->idle_cpus >= busiest->idle_cpus)
+               if (sgs->idle_cpus > busiest->idle_cpus)
+                       return false;
+               else if ((sgs->idle_cpus == busiest->idle_cpus) &&
+                        (sgs->sum_nr_running <= busiest->sum_nr_running))
                        return false;
+
                break;
        }
 
@@ -9529,6 +9539,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 {
        int continue_balancing = 1;
        int cpu = rq->cpu;
+       int busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
        unsigned long interval;
        struct sched_domain *sd;
        /* Earliest time when we have to do rebalance again */
@@ -9565,7 +9576,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
                        break;
                }
 
-               interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
+               interval = get_sd_balance_interval(sd, busy);
 
                need_serialize = sd->flags & SD_SERIALIZE;
                if (need_serialize) {
@@ -9581,9 +9592,10 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
                                 * state even if we migrated tasks. Update it.
                                 */
                                idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
+                               busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
                        }
                        sd->last_balance = jiffies;
-                       interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
+                       interval = get_sd_balance_interval(sd, busy);
                }
                if (need_serialize)
                        spin_unlock(&balancing);
@@ -10333,6 +10345,9 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
        if (!task_on_rq_queued(p))
                return;
 
+       if (rq->cfs.nr_running == 1)
+               return;
+
        /*
         * Reschedule if we are currently running on this runqueue and
         * our priority decreased, or if we are not currently running on
@@ -10423,7 +10438,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
 
        /* Synchronize entity with its cfs_rq */
        update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
-       attach_entity_load_avg(cfs_rq, se, 0);
+       attach_entity_load_avg(cfs_rq, se);
        update_tg_load_avg(cfs_rq, false);
        propagate_entity_cfs_rq(se);
 }
index ffa959e..b743bf3 100644 (file)
@@ -158,7 +158,7 @@ static void cpuidle_idle_call(void)
        /*
         * Suspend-to-idle ("s2idle") is a system state in which all user space
         * has been frozen, all I/O devices have been suspended and the only
-        * activity happens here and in iterrupts (if any).  In that case bypass
+        * activity happens here and in interrupts (if any). In that case bypass
         * the cpuidle governor and go stratight for the deepest idle state
         * available.  Possibly also suspend the local tick and the entire
         * timekeeping to prevent timer interrupts from kicking us out of idle
index a96db50..bd006b7 100644 (file)
@@ -129,8 +129,20 @@ accumulate_sum(u64 delta, struct sched_avg *sa,
                 * Step 2
                 */
                delta %= 1024;
-               contrib = __accumulate_pelt_segments(periods,
-                               1024 - sa->period_contrib, delta);
+               if (load) {
+                       /*
+                        * This relies on the:
+                        *
+                        * if (!load)
+                        *      runnable = running = 0;
+                        *
+                        * clause from ___update_load_sum(); this results in
+                        * the below usage of @contrib to dissapear entirely,
+                        * so no point in calculating it.
+                        */
+                       contrib = __accumulate_pelt_segments(periods,
+                                       1024 - sa->period_contrib, delta);
+               }
        }
        sa->period_contrib = delta;
 
@@ -205,7 +217,9 @@ ___update_load_sum(u64 now, struct sched_avg *sa,
         * This means that weight will be 0 but not running for a sched_entity
         * but also for a cfs_rq if the latter becomes idle. As an example,
         * this happens during idle_balance() which calls
-        * update_blocked_averages()
+        * update_blocked_averages().
+        *
+        * Also see the comment in accumulate_sum().
         */
        if (!load)
                runnable = running = 0;
index ce8f674..db7b50b 100644 (file)
@@ -1280,10 +1280,12 @@ static const struct file_operations psi_cpu_fops = {
 
 static int __init psi_proc_init(void)
 {
-       proc_mkdir("pressure", NULL);
-       proc_create("pressure/io", 0, NULL, &psi_io_fops);
-       proc_create("pressure/memory", 0, NULL, &psi_memory_fops);
-       proc_create("pressure/cpu", 0, NULL, &psi_cpu_fops);
+       if (psi_enable) {
+               proc_mkdir("pressure", NULL);
+               proc_create("pressure/io", 0, NULL, &psi_io_fops);
+               proc_create("pressure/memory", 0, NULL, &psi_memory_fops);
+               proc_create("pressure/cpu", 0, NULL, &psi_cpu_fops);
+       }
        return 0;
 }
 module_init(psi_proc_init);
index e591d40..4043abe 100644 (file)
@@ -437,6 +437,45 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
        return rt_se->on_rq;
 }
 
+#ifdef CONFIG_UCLAMP_TASK
+/*
+ * Verify the fitness of task @p to run on @cpu taking into account the uclamp
+ * settings.
+ *
+ * This check is only important for heterogeneous systems where uclamp_min value
+ * is higher than the capacity of a @cpu. For non-heterogeneous system this
+ * function will always return true.
+ *
+ * The function will return true if the capacity of the @cpu is >= the
+ * uclamp_min and false otherwise.
+ *
+ * Note that uclamp_min will be clamped to uclamp_max if uclamp_min
+ * > uclamp_max.
+ */
+static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
+{
+       unsigned int min_cap;
+       unsigned int max_cap;
+       unsigned int cpu_cap;
+
+       /* Only heterogeneous systems can benefit from this check */
+       if (!static_branch_unlikely(&sched_asym_cpucapacity))
+               return true;
+
+       min_cap = uclamp_eff_value(p, UCLAMP_MIN);
+       max_cap = uclamp_eff_value(p, UCLAMP_MAX);
+
+       cpu_cap = capacity_orig_of(cpu);
+
+       return cpu_cap >= min(min_cap, max_cap);
+}
+#else
+static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
+{
+       return true;
+}
+#endif
+
 #ifdef CONFIG_RT_GROUP_SCHED
 
 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
@@ -1391,6 +1430,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
 {
        struct task_struct *curr;
        struct rq *rq;
+       bool test;
 
        /* For anything but wake ups, just return the task_cpu */
        if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
@@ -1422,10 +1462,16 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
         *
         * This test is optimistic, if we get it wrong the load-balancer
         * will have to sort it out.
+        *
+        * We take into account the capacity of the CPU to ensure it fits the
+        * requirement of the task - which is only important on heterogeneous
+        * systems like big.LITTLE.
         */
-       if (curr && unlikely(rt_task(curr)) &&
-           (curr->nr_cpus_allowed < 2 ||
-            curr->prio <= p->prio)) {
+       test = curr &&
+              unlikely(rt_task(curr)) &&
+              (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
+
+       if (test || !rt_task_fits_capacity(p, cpu)) {
                int target = find_lowest_rq(p);
 
                /*
@@ -1449,15 +1495,15 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
         * let's hope p can move out.
         */
        if (rq->curr->nr_cpus_allowed == 1 ||
-           !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
+           !cpupri_find(&rq->rd->cpupri, rq->curr, NULL, NULL))
                return;
 
        /*
         * p is migratable, so let's not schedule it and
         * see if it is pushed or pulled somewhere else.
         */
-       if (p->nr_cpus_allowed != 1
-           && cpupri_find(&rq->rd->cpupri, p, NULL))
+       if (p->nr_cpus_allowed != 1 &&
+           cpupri_find(&rq->rd->cpupri, p, NULL, NULL))
                return;
 
        /*
@@ -1601,7 +1647,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
        if (!task_running(rq, p) &&
-           cpumask_test_cpu(cpu, p->cpus_ptr))
+           cpumask_test_cpu(cpu, p->cpus_ptr) &&
+           rt_task_fits_capacity(p, cpu))
                return 1;
 
        return 0;
@@ -1643,7 +1690,8 @@ static int find_lowest_rq(struct task_struct *task)
        if (task->nr_cpus_allowed == 1)
                return -1; /* No other targets possible */
 
-       if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
+       if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask,
+                        rt_task_fits_capacity))
                return -1; /* No targets found */
 
        /*
@@ -2147,12 +2195,14 @@ skip:
  */
 static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
-       if (!task_running(rq, p) &&
-           !test_tsk_need_resched(rq->curr) &&
-           p->nr_cpus_allowed > 1 &&
-           (dl_task(rq->curr) || rt_task(rq->curr)) &&
-           (rq->curr->nr_cpus_allowed < 2 ||
-            rq->curr->prio <= p->prio))
+       bool need_to_push = !task_running(rq, p) &&
+                           !test_tsk_need_resched(rq->curr) &&
+                           p->nr_cpus_allowed > 1 &&
+                           (dl_task(rq->curr) || rt_task(rq->curr)) &&
+                           (rq->curr->nr_cpus_allowed < 2 ||
+                            rq->curr->prio <= p->prio);
+
+       if (need_to_push || !rt_task_fits_capacity(p, cpu_of(rq)))
                push_rt_tasks(rq);
 }
 
@@ -2224,7 +2274,10 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
         */
        if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
-               if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
+               bool need_to_push = rq->rt.overloaded ||
+                                   !rt_task_fits_capacity(p, cpu_of(rq));
+
+               if (p->nr_cpus_allowed > 1 && need_to_push)
                        rt_queue_push_tasks(rq);
 #endif /* CONFIG_SMP */
                if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
index 280a3c7..1a88dc8 100644 (file)
@@ -2300,14 +2300,14 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef CONFIG_UCLAMP_TASK
-unsigned int uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
+unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
 
 static __always_inline
-unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
-                             struct task_struct *p)
+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+                                 struct task_struct *p)
 {
-       unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
-       unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
+       unsigned long min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
+       unsigned long max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
 
        if (p) {
                min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
@@ -2324,18 +2324,10 @@ unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
 
        return clamp(util, min_util, max_util);
 }
-
-static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
-{
-       return uclamp_util_with(rq, util, NULL);
-}
 #else /* CONFIG_UCLAMP_TASK */
-static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
-                                           struct task_struct *p)
-{
-       return util;
-}
-static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
+static inline
+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
+                                 struct task_struct *p)
 {
        return util;
 }
index 6ec1e59..dfb64c0 100644 (file)
@@ -1879,6 +1879,42 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
        return sd;
 }
 
+/*
+ * Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for
+ * any two given CPUs at this (non-NUMA) topology level.
+ */
+static bool topology_span_sane(struct sched_domain_topology_level *tl,
+                             const struct cpumask *cpu_map, int cpu)
+{
+       int i;
+
+       /* NUMA levels are allowed to overlap */
+       if (tl->flags & SDTL_OVERLAP)
+               return true;
+
+       /*
+        * Non-NUMA levels cannot partially overlap - they must be either
+        * completely equal or completely disjoint. Otherwise we can end up
+        * breaking the sched_group lists - i.e. a later get_group() pass
+        * breaks the linking done for an earlier span.
+        */
+       for_each_cpu(i, cpu_map) {
+               if (i == cpu)
+                       continue;
+               /*
+                * We should 'and' all those masks with 'cpu_map' to exactly
+                * match the topology we're about to build, but that can only
+                * remove CPUs, which only lessens our ability to detect
+                * overlaps
+                */
+               if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
+                   cpumask_intersects(tl->mask(cpu), tl->mask(i)))
+                       return false;
+       }
+
+       return true;
+}
+
 /*
  * Find the sched_domain_topology_level where all CPU capacities are visible
  * for all CPUs.
@@ -1975,6 +2011,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
                                has_asym = true;
                        }
 
+                       if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
+                               goto error;
+
                        sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
 
                        if (tl == sched_domain_topology)
index 45eba18..02ce292 100644 (file)
@@ -179,6 +179,7 @@ void init_wait_var_entry(struct wait_bit_queue_entry *wbq_entry, void *var, int
                        .bit_nr = -1,
                },
                .wq_entry = {
+                       .flags   = flags,
                        .private = current,
                        .func    = var_wake_function,
                        .entry   = LIST_HEAD_INIT(wbq_entry->wq_entry.entry),
index 1fe34a9..865bb02 100644 (file)
@@ -442,7 +442,7 @@ static int __stop_cpus(const struct cpumask *cpumask,
  * @cpumask were offline; otherwise, 0 if all executions of @fn
  * returned 0, any non zero return value if any returned non zero.
  */
-int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
+static int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
 {
        int ret;
 
@@ -453,36 +453,6 @@ int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
        return ret;
 }
 
-/**
- * try_stop_cpus - try to stop multiple cpus
- * @cpumask: cpus to stop
- * @fn: function to execute
- * @arg: argument to @fn
- *
- * Identical to stop_cpus() except that it fails with -EAGAIN if
- * someone else is already using the facility.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * -EAGAIN if someone else is already stopping cpus, -ENOENT if
- * @fn(@arg) was not executed at all because all cpus in @cpumask were
- * offline; otherwise, 0 if all executions of @fn returned 0, any non
- * zero return value if any returned non zero.
- */
-int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
-{
-       int ret;
-
-       /* static works are used, process one request at a time */
-       if (!mutex_trylock(&stop_cpus_mutex))
-               return -EAGAIN;
-       ret = __stop_cpus(cpumask, fn, arg);
-       mutex_unlock(&stop_cpus_mutex);
-       return ret;
-}
-
 static int cpu_stop_should_run(unsigned int cpu)
 {
        struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
index 4bdfa27..301db44 100644 (file)
@@ -2280,7 +2280,7 @@ __acquires(&pool->lock)
        }
 
        /*
-        * The following prevents a kworker from hogging CPU on !PREEMPT
+        * The following prevents a kworker from hogging CPU on !PREEMPTION
         * kernels, where a requeueing work item waiting for something to
         * happen could deadlock with stop_machine as such work item could
         * indefinitely requeue itself while all other CPUs are trapped in
index 5ffe144..6859f52 100644 (file)
@@ -1025,7 +1025,7 @@ config DEBUG_TIMEKEEPING
 
 config DEBUG_PREEMPT
        bool "Debug preemptible kernel"
-       depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
+       depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
        default y
        help
          If you say Y here then the kernel will use a debug variant of the
index 45442d9..1c4be87 100644 (file)
@@ -2203,7 +2203,7 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
                                pte_t *page_table, pte_t orig_pte)
 {
        int same = 1;
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
        if (sizeof(pte_t) > sizeof(unsigned long)) {
                spinlock_t *ptl = pte_lockptr(mm, pmd);
                spin_lock(ptl);
index 2e1a577..0ab92ec 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1964,7 +1964,7 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
        return get_any_partial(s, flags, c);
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 /*
  * Calculate the next globally unique transaction for disambiguiation
  * during cmpxchg. The transactions start with the cpu number and are then
@@ -2009,7 +2009,7 @@ static inline void note_cmpxchg_failure(const char *n,
 
        pr_info("%s %s: cmpxchg redo ", n, s->name);
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        if (tid_to_cpu(tid) != tid_to_cpu(actual_tid))
                pr_warn("due to cpu change %d -> %d\n",
                        tid_to_cpu(tid), tid_to_cpu(actual_tid));
@@ -2637,7 +2637,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
        unsigned long flags;
 
        local_irq_save(flags);
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
        /*
         * We may have been preempted and rescheduled on a different
         * cpu before disabling interrupts. Need to reload cpu area
@@ -2691,13 +2691,13 @@ redo:
         * as we end up on the original cpu again when doing the cmpxchg.
         *
         * We should guarantee that tid and kmem_cache are retrieved on
-        * the same cpu. It could be different if CONFIG_PREEMPT so we need
+        * the same cpu. It could be different if CONFIG_PREEMPTION so we need
         * to check if it is matched or not.
         */
        do {
                tid = this_cpu_read(s->cpu_slab->tid);
                c = raw_cpu_ptr(s->cpu_slab);
-       } while (IS_ENABLED(CONFIG_PREEMPT) &&
+       } while (IS_ENABLED(CONFIG_PREEMPTION) &&
                 unlikely(tid != READ_ONCE(c->tid)));
 
        /*
@@ -2971,7 +2971,7 @@ redo:
        do {
                tid = this_cpu_read(s->cpu_slab->tid);
                c = raw_cpu_ptr(s->cpu_slab);
-       } while (IS_ENABLED(CONFIG_PREEMPT) &&
+       } while (IS_ENABLED(CONFIG_PREEMPTION) &&
                 unlikely(tid != READ_ONCE(c->tid)));
 
        /* Same with comment on barrier() in slab_alloc_node() */
index 81befd0..3d3ea1c 100644 (file)
@@ -928,7 +928,7 @@ EXPORT_SYMBOL(dev_get_by_napi_id);
  *
  *     The use of raw_seqcount_begin() and cond_resched() before
  *     retrying is required as we want to give the writers a chance
- *     to complete when CONFIG_PREEMPT is not set.
+ *     to complete when CONFIG_PREEMPTION is not set.
  */
 int netdev_get_name(struct net *net, char *name, int ifindex)
 {