X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;ds=sidebyside;f=kernel%2Fsched%2Fsched.h;h=590e6f27068cba844143faf6d31b97d6891f05c3;hb=3aef1551e942860a3881087171ef0cd45f6ebda7;hp=28709f6b0975c8bf5a36266b41ec356280adde29;hpb=ee4a925107d1f4d10ee0a935841c2491f1c06ec0;p=linux-2.6-microblaze.git diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 28709f6b0975..590e6f27068c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -67,7 +67,6 @@ #include #include -#include #ifdef CONFIG_PARAVIRT # include @@ -257,30 +256,6 @@ struct rt_bandwidth { void __dl_clear_params(struct task_struct *p); -/* - * To keep the bandwidth of -deadline tasks and groups under control - * we need some place where: - * - store the maximum -deadline bandwidth of the system (the group); - * - cache the fraction of that bandwidth that is currently allocated. - * - * This is all done in the data structure below. It is similar to the - * one used for RT-throttling (rt_bandwidth), with the main difference - * that, since here we are only interested in admission control, we - * do not decrease any runtime while the group "executes", neither we - * need a timer to replenish it. - * - * With respect to SMP, the bandwidth is given on a per-CPU basis, - * meaning that: - * - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU; - * - dl_total_bw array contains, in the i-eth element, the currently - * allocated bandwidth on the i-eth CPU. - * Moreover, groups consume bandwidth on each CPU, while tasks only - * consume bandwidth on the CPU they're running on. - * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw - * that will be shown the next time the proc or cgroup controls will - * be red. It on its turn can be changed by writing on its own - * control. - */ struct dl_bandwidth { raw_spinlock_t dl_runtime_lock; u64 dl_runtime; @@ -292,6 +267,24 @@ static inline int dl_bandwidth_enabled(void) return sysctl_sched_rt_runtime >= 0; } +/* + * To keep the bandwidth of -deadline tasks under control + * we need some place where: + * - store the maximum -deadline bandwidth of each cpu; + * - cache the fraction of bandwidth that is currently allocated in + * each root domain; + * + * This is all done in the data structure below. It is similar to the + * one used for RT-throttling (rt_bandwidth), with the main difference + * that, since here we are only interested in admission control, we + * do not decrease any runtime while the group "executes", neither we + * need a timer to replenish it. + * + * With respect to SMP, bandwidth is given on a per root domain basis, + * meaning that: + * - bw (< 100%) is the deadline bandwidth of each CPU; + * - total_bw is the currently allocated bandwidth in each root domain; + */ struct dl_bw { raw_spinlock_t lock; u64 bw; @@ -801,6 +794,15 @@ struct root_domain { struct dl_bw dl_bw; struct cpudl cpudl; + /* + * Indicate whether a root_domain's dl_bw has been checked or + * updated. It's monotonously increasing value. + * + * Also, some corner cases, like 'wrap around' is dangerous, but given + * that u64 is 'big enough'. So that shouldn't be a concern. + */ + u64 visit_gen; + #ifdef HAVE_RT_PUSH_IPI /* * For IPI pull requests, loop across the rto_mask. @@ -973,6 +975,7 @@ struct rq { unsigned long cpu_capacity_orig; struct callback_head *balance_callback; + unsigned char balance_flags; unsigned char nohz_idle_balance; unsigned char idle_balance; @@ -1003,6 +1006,10 @@ struct rq { /* This is used to determine avg_idle's max value */ u64 max_idle_balance_cost; + +#ifdef CONFIG_HOTPLUG_CPU + struct rcuwait hotplug_wait; +#endif #endif /* CONFIG_SMP */ #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -1048,6 +1055,12 @@ struct rq { /* Must be inspected within a rcu lock section */ struct cpuidle_state *idle_state; #endif + +#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP) + unsigned int nr_pinned; +#endif + unsigned int push_busy; + struct cpu_stop_work push_work; }; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1075,6 +1088,16 @@ static inline int cpu_of(struct rq *rq) #endif } +#define MDF_PUSH 0x01 + +static inline bool is_migration_disabled(struct task_struct *p) +{ +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT) + return p->migration_disabled; +#else + return false; +#endif +} #ifdef CONFIG_SCHED_SMT extern void __update_idle_core(struct rq *rq); @@ -1221,6 +1244,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); rf->clock_update_flags = 0; #endif +#ifdef CONFIG_SMP + SCHED_WARN_ON(rq->balance_callback); +#endif } static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) @@ -1382,6 +1408,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p) #ifdef CONFIG_SMP +#define BALANCE_WORK 0x01 +#define BALANCE_PUSH 0x02 + static inline void queue_balance_callback(struct rq *rq, struct callback_head *head, @@ -1389,12 +1418,13 @@ queue_balance_callback(struct rq *rq, { lockdep_assert_held(&rq->lock); - if (unlikely(head->next)) + if (unlikely(head->next || (rq->balance_flags & BALANCE_PUSH))) return; head->func = (void (*)(struct callback_head *))func; head->next = rq->balance_callback; rq->balance_callback = head; + rq->balance_flags |= BALANCE_WORK; } #define rcu_dereference_check_sched_domain(p) \ @@ -1471,7 +1501,7 @@ struct sched_group_capacity { int id; #endif - unsigned long cpumask[0]; /* Balance mask */ + unsigned long cpumask[]; /* Balance mask */ }; struct sched_group { @@ -1629,7 +1659,7 @@ enum { #undef SCHED_FEAT -#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL) +#ifdef CONFIG_SCHED_DEBUG /* * To support run-time toggling of sched features, all the translation units @@ -1637,6 +1667,7 @@ enum { */ extern const_debug unsigned int sysctl_sched_features; +#ifdef CONFIG_JUMP_LABEL #define SCHED_FEAT(name, enabled) \ static __always_inline bool static_branch_##name(struct static_key *key) \ { \ @@ -1649,7 +1680,13 @@ static __always_inline bool static_branch_##name(struct static_key *key) \ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) -#else /* !(SCHED_DEBUG && CONFIG_JUMP_LABEL) */ +#else /* !CONFIG_JUMP_LABEL */ + +#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) + +#endif /* CONFIG_JUMP_LABEL */ + +#else /* !SCHED_DEBUG */ /* * Each translation unit has its own copy of sysctl_sched_features to allow @@ -1665,7 +1702,7 @@ static const_debug __maybe_unused unsigned int sysctl_sched_features = #define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) -#endif /* SCHED_DEBUG && CONFIG_JUMP_LABEL */ +#endif /* SCHED_DEBUG */ extern struct static_key_false sched_numa_balancing; extern struct static_key_false sched_schedstats; @@ -1707,13 +1744,20 @@ static inline int task_on_rq_migrating(struct task_struct *p) return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING; } -/* - * wake flags - */ -#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ -#define WF_FORK 0x02 /* Child wakeup after fork */ -#define WF_MIGRATED 0x04 /* Internal use, task got migrated */ -#define WF_ON_CPU 0x08 /* Wakee is on_cpu */ +/* Wake flags. The first three directly map to some SD flag value */ +#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */ +#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */ +#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */ + +#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ +#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ +#define WF_ON_CPU 0x40 /* Wakee is on_cpu */ + +#ifdef CONFIG_SMP +static_assert(WF_EXEC == SD_BALANCE_EXEC); +static_assert(WF_FORK == SD_BALANCE_FORK); +static_assert(WF_TTWU == SD_BALANCE_WAKE); +#endif /* * To aid in avoiding the subversion of "niceness" due to uneven distribution @@ -1789,16 +1833,19 @@ struct sched_class { #ifdef CONFIG_SMP int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); - int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); + int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags); void (*migrate_task_rq)(struct task_struct *p, int new_cpu); void (*task_woken)(struct rq *this_rq, struct task_struct *task); void (*set_cpus_allowed)(struct task_struct *p, - const struct cpumask *newmask); + const struct cpumask *newmask, + u32 flags); void (*rq_online)(struct rq *rq); void (*rq_offline)(struct rq *rq); + + struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq); #endif void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); @@ -1826,7 +1873,7 @@ struct sched_class { #ifdef CONFIG_FAIR_GROUP_SCHED void (*task_change_group)(struct task_struct *p, int type); #endif -} __aligned(STRUCT_ALIGNMENT); /* STRUCT_ALIGN(), vmlinux.lds.h */ +}; static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { @@ -1840,6 +1887,20 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) next->sched_class->set_next_task(rq, next, false); } + +/* + * Helper to define a sched_class instance; each one is placed in a separate + * section which is ordered by the linker script: + * + * include/asm-generic/vmlinux.lds.h + * + * Also enforce alignment on the instance, not the type, to guarantee layout. + */ +#define DEFINE_SCHED_CLASS(name) \ +const struct sched_class name##_sched_class \ + __aligned(__alignof__(struct sched_class)) \ + __section("__" #name "_sched_class") + /* Defined in include/asm-generic/vmlinux.lds.h */ extern struct sched_class __begin_sched_classes[]; extern struct sched_class __end_sched_classes[]; @@ -1882,13 +1943,35 @@ static inline bool sched_fair_runnable(struct rq *rq) extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); extern struct task_struct *pick_next_task_idle(struct rq *rq); +#define SCA_CHECK 0x01 +#define SCA_MIGRATE_DISABLE 0x02 +#define SCA_MIGRATE_ENABLE 0x04 + #ifdef CONFIG_SMP extern void update_group_capacity(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); -extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask); +extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags); + +static inline struct task_struct *get_push_task(struct rq *rq) +{ + struct task_struct *p = rq->curr; + + lockdep_assert_held(&rq->lock); + + if (rq->push_busy) + return NULL; + + if (p->nr_cpus_allowed == 1) + return NULL; + + rq->push_busy = true; + return get_task_struct(p); +} + +extern int push_cpu_stop(void *arg); #endif