* is necessary for the block to be a migration source/target.
*/
do {
- if (pfn_valid_within(pfn)) {
- if (check_source && PageLRU(page)) {
- clear_pageblock_skip(page);
- return true;
- }
+ if (check_source && PageLRU(page)) {
+ clear_pageblock_skip(page);
+ return true;
+ }
- if (check_target && PageBuddy(page)) {
- clear_pageblock_skip(page);
- return true;
- }
+ if (check_target && PageBuddy(page)) {
+ clear_pageblock_skip(page);
+ return true;
}
page += (1 << PAGE_ALLOC_COSTLY_ORDER);
break;
nr_scanned++;
- if (!pfn_valid_within(blockpfn))
- goto isolate_fail;
/*
* For compound pages such as THP and hugetlbfs, we can save
cond_resched();
}
- if (!pfn_valid_within(low_pfn))
- goto isolate_fail;
nr_scanned++;
page = pfn_to_page(low_pfn);
if (!TestClearPageLRU(page))
goto isolate_fail_put;
- lruvec = mem_cgroup_page_lruvec(page, pgdat);
+ lruvec = mem_cgroup_page_lruvec(page);
/* If we already hold the lock, we can skip some rechecking */
if (lruvec != locked) {
if (!list_is_last(freelist, &freepage->lru)) {
list_cut_before(&sublist, freelist, &freepage->lru);
- if (!list_empty(&sublist))
- list_splice_tail(&sublist, freelist);
+ list_splice_tail(&sublist, freelist);
}
}
if (!list_is_first(freelist, &freepage->lru)) {
list_cut_position(&sublist, freelist, &freepage->lru);
- if (!list_empty(&sublist))
- list_splice_tail(&sublist, freelist);
+ list_splice_tail(&sublist, freelist);
}
}
static unsigned long
fast_isolate_freepages(struct compact_control *cc)
{
- unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
+ unsigned int limit = max(1U, freelist_scan_limit(cc) >> 1);
unsigned int nr_scanned = 0;
unsigned long low_pfn, min_pfn, highest = 0;
unsigned long nr_isolated = 0;
spin_unlock_irqrestore(&cc->zone->lock, flags);
/*
- * Smaller scan on next order so the total scan ig related
+ * Smaller scan on next order so the total scan is related
* to freelist_scan_limit.
*/
if (order_scanned >= limit)
- limit = min(1U, limit >> 1);
+ limit = max(1U, limit >> 1);
}
if (!page) {
static bool kswapd_is_running(pg_data_t *pgdat)
{
- return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING);
+ return pgdat->kswapd && task_is_running(pgdat->kswapd);
}
/*
err = migrate_pages(&cc->migratepages, compaction_alloc,
compaction_free, (unsigned long)cc, cc->mode,
- MR_COMPACTION);
+ MR_COMPACTION, NULL);
trace_mm_compaction_migratepages(cc->nr_migratepages, err,
&cc->migratepages);
*/
unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
+int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *length, loff_t *ppos)
+{
+ int rc, nid;
+
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+ if (write && sysctl_compaction_proactiveness) {
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ if (pgdat->proactive_compact_trigger)
+ continue;
+
+ pgdat->proactive_compact_trigger = true;
+ wake_up_interruptible(&pgdat->kcompactd_wait);
+ }
+ }
+
+ return 0;
+}
+
/*
* This is the entry point for compacting all nodes via
* /proc/sys/vm/compact_memory
}
#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
-static ssize_t sysfs_compact_node(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t compact_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
int nid = dev->id;
return count;
}
-static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
+static DEVICE_ATTR_WO(compact);
int compaction_register_node(struct node *node)
{
static inline bool kcompactd_work_requested(pg_data_t *pgdat)
{
- return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
+ return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
+ pgdat->proactive_compact_trigger;
}
static bool kcompactd_node_suitable(pg_data_t *pgdat)
{
pg_data_t *pgdat = (pg_data_t *)p;
struct task_struct *tsk = current;
- unsigned int proactive_defer = 0;
+ long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
+ long timeout = default_timeout;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
while (!kthread_should_stop()) {
unsigned long pflags;
+ /*
+ * Avoid the unnecessary wakeup for proactive compaction
+ * when it is disabled.
+ */
+ if (!sysctl_compaction_proactiveness)
+ timeout = MAX_SCHEDULE_TIMEOUT;
trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
- kcompactd_work_requested(pgdat),
- msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) {
+ kcompactd_work_requested(pgdat), timeout) &&
+ !pgdat->proactive_compact_trigger) {
psi_memstall_enter(&pflags);
kcompactd_do_work(pgdat);
psi_memstall_leave(&pflags);
+ /*
+ * Reset the timeout value. The defer timeout from
+ * proactive compaction is lost here but that is fine
+ * as the condition of the zone changing substantionally
+ * then carrying on with the previous defer interval is
+ * not useful.
+ */
+ timeout = default_timeout;
continue;
}
- /* kcompactd wait timeout */
+ /*
+ * Start the proactive work with default timeout. Based
+ * on the fragmentation score, this timeout is updated.
+ */
+ timeout = default_timeout;
if (should_proactive_compact_node(pgdat)) {
unsigned int prev_score, score;
- if (proactive_defer) {
- proactive_defer--;
- continue;
- }
prev_score = fragmentation_score_node(pgdat);
proactive_compact_node(pgdat);
score = fragmentation_score_node(pgdat);
* Defer proactive compaction if the fragmentation
* score did not go down i.e. no progress made.
*/
- proactive_defer = score < prev_score ?
- 0 : 1 << COMPACT_MAX_DEFER_SHIFT;
+ if (unlikely(score >= prev_score))
+ timeout =
+ default_timeout << COMPACT_MAX_DEFER_SHIFT;
}
+ if (unlikely(pgdat->proactive_compact_trigger))
+ pgdat->proactive_compact_trigger = false;
}
return 0;