Merge branch 'misc.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-microblaze.git] / mm / compaction.c
index 84fde27..bfc93da 100644 (file)
@@ -306,16 +306,14 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
         * is necessary for the block to be a migration source/target.
         */
        do {
-               if (pfn_valid_within(pfn)) {
-                       if (check_source && PageLRU(page)) {
-                               clear_pageblock_skip(page);
-                               return true;
-                       }
+               if (check_source && PageLRU(page)) {
+                       clear_pageblock_skip(page);
+                       return true;
+               }
 
-                       if (check_target && PageBuddy(page)) {
-                               clear_pageblock_skip(page);
-                               return true;
-                       }
+               if (check_target && PageBuddy(page)) {
+                       clear_pageblock_skip(page);
+                       return true;
                }
 
                page += (1 << PAGE_ALLOC_COSTLY_ORDER);
@@ -585,8 +583,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
                        break;
 
                nr_scanned++;
-               if (!pfn_valid_within(blockpfn))
-                       goto isolate_fail;
 
                /*
                 * For compound pages such as THP and hugetlbfs, we can save
@@ -885,8 +881,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                        cond_resched();
                }
 
-               if (!pfn_valid_within(low_pfn))
-                       goto isolate_fail;
                nr_scanned++;
 
                page = pfn_to_page(low_pfn);
@@ -1028,7 +1022,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                if (!TestClearPageLRU(page))
                        goto isolate_fail_put;
 
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+               lruvec = mem_cgroup_page_lruvec(page);
 
                /* If we already hold the lock, we can skip some rechecking */
                if (lruvec != locked) {
@@ -1297,8 +1291,7 @@ move_freelist_head(struct list_head *freelist, struct page *freepage)
 
        if (!list_is_last(freelist, &freepage->lru)) {
                list_cut_before(&sublist, freelist, &freepage->lru);
-               if (!list_empty(&sublist))
-                       list_splice_tail(&sublist, freelist);
+               list_splice_tail(&sublist, freelist);
        }
 }
 
@@ -1315,8 +1308,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage)
 
        if (!list_is_first(freelist, &freepage->lru)) {
                list_cut_position(&sublist, freelist, &freepage->lru);
-               if (!list_empty(&sublist))
-                       list_splice_tail(&sublist, freelist);
+               list_splice_tail(&sublist, freelist);
        }
 }
 
@@ -1380,7 +1372,7 @@ static int next_search_order(struct compact_control *cc, int order)
 static unsigned long
 fast_isolate_freepages(struct compact_control *cc)
 {
-       unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
+       unsigned int limit = max(1U, freelist_scan_limit(cc) >> 1);
        unsigned int nr_scanned = 0;
        unsigned long low_pfn, min_pfn, highest = 0;
        unsigned long nr_isolated = 0;
@@ -1492,11 +1484,11 @@ fast_isolate_freepages(struct compact_control *cc)
                spin_unlock_irqrestore(&cc->zone->lock, flags);
 
                /*
-                * Smaller scan on next order so the total scan ig related
+                * Smaller scan on next order so the total scan is related
                 * to freelist_scan_limit.
                 */
                if (order_scanned >= limit)
-                       limit = min(1U, limit >> 1);
+                       limit = max(1U, limit >> 1);
        }
 
        if (!page) {
@@ -1955,7 +1947,7 @@ static inline bool is_via_compact_memory(int order)
 
 static bool kswapd_is_running(pg_data_t *pgdat)
 {
-       return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING);
+       return pgdat->kswapd && task_is_running(pgdat->kswapd);
 }
 
 /*
@@ -2400,7 +2392,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
 
                err = migrate_pages(&cc->migratepages, compaction_alloc,
                                compaction_free, (unsigned long)cc, cc->mode,
-                               MR_COMPACTION);
+                               MR_COMPACTION, NULL);
 
                trace_mm_compaction_migratepages(cc->nr_migratepages, err,
                                                        &cc->migratepages);
@@ -2708,6 +2700,30 @@ static void compact_nodes(void)
  */
 unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
 
+int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
+               void *buffer, size_t *length, loff_t *ppos)
+{
+       int rc, nid;
+
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+       if (rc)
+               return rc;
+
+       if (write && sysctl_compaction_proactiveness) {
+               for_each_online_node(nid) {
+                       pg_data_t *pgdat = NODE_DATA(nid);
+
+                       if (pgdat->proactive_compact_trigger)
+                               continue;
+
+                       pgdat->proactive_compact_trigger = true;
+                       wake_up_interruptible(&pgdat->kcompactd_wait);
+               }
+       }
+
+       return 0;
+}
+
 /*
  * This is the entry point for compacting all nodes via
  * /proc/sys/vm/compact_memory
@@ -2722,9 +2738,9 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
 }
 
 #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
-static ssize_t sysfs_compact_node(struct device *dev,
-                       struct device_attribute *attr,
-                       const char *buf, size_t count)
+static ssize_t compact_store(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buf, size_t count)
 {
        int nid = dev->id;
 
@@ -2737,7 +2753,7 @@ static ssize_t sysfs_compact_node(struct device *dev,
 
        return count;
 }
-static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
+static DEVICE_ATTR_WO(compact);
 
 int compaction_register_node(struct node *node)
 {
@@ -2752,7 +2768,8 @@ void compaction_unregister_node(struct node *node)
 
 static inline bool kcompactd_work_requested(pg_data_t *pgdat)
 {
-       return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
+       return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
+               pgdat->proactive_compact_trigger;
 }
 
 static bool kcompactd_node_suitable(pg_data_t *pgdat)
@@ -2887,7 +2904,8 @@ static int kcompactd(void *p)
 {
        pg_data_t *pgdat = (pg_data_t *)p;
        struct task_struct *tsk = current;
-       unsigned int proactive_defer = 0;
+       long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
+       long timeout = default_timeout;
 
        const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
 
@@ -2902,25 +2920,39 @@ static int kcompactd(void *p)
        while (!kthread_should_stop()) {
                unsigned long pflags;
 
+               /*
+                * Avoid the unnecessary wakeup for proactive compaction
+                * when it is disabled.
+                */
+               if (!sysctl_compaction_proactiveness)
+                       timeout = MAX_SCHEDULE_TIMEOUT;
                trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
                if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
-                       kcompactd_work_requested(pgdat),
-                       msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) {
+                       kcompactd_work_requested(pgdat), timeout) &&
+                       !pgdat->proactive_compact_trigger) {
 
                        psi_memstall_enter(&pflags);
                        kcompactd_do_work(pgdat);
                        psi_memstall_leave(&pflags);
+                       /*
+                        * Reset the timeout value. The defer timeout from
+                        * proactive compaction is lost here but that is fine
+                        * as the condition of the zone changing substantionally
+                        * then carrying on with the previous defer interval is
+                        * not useful.
+                        */
+                       timeout = default_timeout;
                        continue;
                }
 
-               /* kcompactd wait timeout */
+               /*
+                * Start the proactive work with default timeout. Based
+                * on the fragmentation score, this timeout is updated.
+                */
+               timeout = default_timeout;
                if (should_proactive_compact_node(pgdat)) {
                        unsigned int prev_score, score;
 
-                       if (proactive_defer) {
-                               proactive_defer--;
-                               continue;
-                       }
                        prev_score = fragmentation_score_node(pgdat);
                        proactive_compact_node(pgdat);
                        score = fragmentation_score_node(pgdat);
@@ -2928,9 +2960,12 @@ static int kcompactd(void *p)
                         * Defer proactive compaction if the fragmentation
                         * score did not go down i.e. no progress made.
                         */
-                       proactive_defer = score < prev_score ?
-                                       0 : 1 << COMPACT_MAX_DEFER_SHIFT;
+                       if (unlikely(score >= prev_score))
+                               timeout =
+                                  default_timeout << COMPACT_MAX_DEFER_SHIFT;
                }
+               if (unlikely(pgdat->proactive_compact_trigger))
+                       pgdat->proactive_compact_trigger = false;
        }
 
        return 0;