Merge branch 'misc.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[linux-2.6-microblaze.git] / mm / compaction.c
diff --git a/mm/compaction.c b/mm/compaction.c

index 84fde27..bfc93da 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -306,16 +306,14 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
          * is necessary for the block to be a migration source/target.
          */
         do {
-               if (pfn_valid_within(pfn)) {
-                       if (check_source && PageLRU(page)) {
-                               clear_pageblock_skip(page);
-                               return true;
-                       }
+               if (check_source && PageLRU(page)) {
+                       clear_pageblock_skip(page);
+                       return true;
+               }
  
-                       if (check_target && PageBuddy(page)) {
-                               clear_pageblock_skip(page);
-                               return true;
-                       }
+               if (check_target && PageBuddy(page)) {
+                       clear_pageblock_skip(page);
+                       return true;
                 }
  
                 page += (1 << PAGE_ALLOC_COSTLY_ORDER);
@@ -585,8 +583,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
                         break;
  
                 nr_scanned++;
-               if (!pfn_valid_within(blockpfn))
-                       goto isolate_fail;
  
                 /*
                  * For compound pages such as THP and hugetlbfs, we can save
@@ -885,8 +881,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         cond_resched();
                 }
  
-               if (!pfn_valid_within(low_pfn))
-                       goto isolate_fail;
                 nr_scanned++;
  
                 page = pfn_to_page(low_pfn);
@@ -1028,7 +1022,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                 if (!TestClearPageLRU(page))
                         goto isolate_fail_put;
  
-               lruvec = mem_cgroup_page_lruvec(page, pgdat);
+               lruvec = mem_cgroup_page_lruvec(page);
  
                 /* If we already hold the lock, we can skip some rechecking */
                 if (lruvec != locked) {
@@ -1297,8 +1291,7 @@ move_freelist_head(struct list_head *freelist, struct page *freepage)
  
         if (!list_is_last(freelist, &freepage->lru)) {
                 list_cut_before(&sublist, freelist, &freepage->lru);
-               if (!list_empty(&sublist))
-                       list_splice_tail(&sublist, freelist);
+               list_splice_tail(&sublist, freelist);
         }
  }
  
@@ -1315,8 +1308,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage)
  
         if (!list_is_first(freelist, &freepage->lru)) {
                 list_cut_position(&sublist, freelist, &freepage->lru);
-               if (!list_empty(&sublist))
-                       list_splice_tail(&sublist, freelist);
+               list_splice_tail(&sublist, freelist);
         }
  }
  
@@ -1380,7 +1372,7 @@ static int next_search_order(struct compact_control *cc, int order)
  static unsigned long
  fast_isolate_freepages(struct compact_control *cc)
  {
-       unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
+       unsigned int limit = max(1U, freelist_scan_limit(cc) >> 1);
         unsigned int nr_scanned = 0;
         unsigned long low_pfn, min_pfn, highest = 0;
         unsigned long nr_isolated = 0;
@@ -1492,11 +1484,11 @@ fast_isolate_freepages(struct compact_control *cc)
                 spin_unlock_irqrestore(&cc->zone->lock, flags);
  
                 /*
-                * Smaller scan on next order so the total scan ig related
+                * Smaller scan on next order so the total scan is related
                  * to freelist_scan_limit.
                  */
                 if (order_scanned >= limit)
-                       limit = min(1U, limit >> 1);
+                       limit = max(1U, limit >> 1);
         }
  
         if (!page) {
@@ -1955,7 +1947,7 @@ static inline bool is_via_compact_memory(int order)
  
  static bool kswapd_is_running(pg_data_t *pgdat)
  {
-       return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING);
+       return pgdat->kswapd && task_is_running(pgdat->kswapd);
  }
  
  /*
@@ -2400,7 +2392,7 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
  
                 err = migrate_pages(&cc->migratepages, compaction_alloc,
                                 compaction_free, (unsigned long)cc, cc->mode,
-                               MR_COMPACTION);
+                               MR_COMPACTION, NULL);
  
                 trace_mm_compaction_migratepages(cc->nr_migratepages, err,
                                                         &cc->migratepages);
@@ -2708,6 +2700,30 @@ static void compact_nodes(void)
   */
  unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
  
+int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
+               void *buffer, size_t *length, loff_t *ppos)
+{
+       int rc, nid;
+
+       rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+       if (rc)
+               return rc;
+
+       if (write && sysctl_compaction_proactiveness) {
+               for_each_online_node(nid) {
+                       pg_data_t *pgdat = NODE_DATA(nid);
+
+                       if (pgdat->proactive_compact_trigger)
+                               continue;
+
+                       pgdat->proactive_compact_trigger = true;
+                       wake_up_interruptible(&pgdat->kcompactd_wait);
+               }
+       }
+
+       return 0;
+}
+
  /*
   * This is the entry point for compacting all nodes via
   * /proc/sys/vm/compact_memory
@@ -2722,9 +2738,9 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
  }
  
  #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
-static ssize_t sysfs_compact_node(struct device *dev,
-                       struct device_attribute *attr,
-                       const char *buf, size_t count)
+static ssize_t compact_store(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buf, size_t count)
  {
         int nid = dev->id;
  
@@ -2737,7 +2753,7 @@ static ssize_t sysfs_compact_node(struct device *dev,
  
         return count;
  }
-static DEVICE_ATTR(compact, 0200, NULL, sysfs_compact_node);
+static DEVICE_ATTR_WO(compact);
  
  int compaction_register_node(struct node *node)
  {
@@ -2752,7 +2768,8 @@ void compaction_unregister_node(struct node *node)
  
  static inline bool kcompactd_work_requested(pg_data_t *pgdat)
  {
-       return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
+       return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
+               pgdat->proactive_compact_trigger;
  }
  
  static bool kcompactd_node_suitable(pg_data_t *pgdat)
@@ -2887,7 +2904,8 @@ static int kcompactd(void *p)
  {
         pg_data_t *pgdat = (pg_data_t *)p;
         struct task_struct *tsk = current;
-       unsigned int proactive_defer = 0;
+       long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
+       long timeout = default_timeout;
  
         const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
@@ -2902,25 +2920,39 @@ static int kcompactd(void *p)
         while (!kthread_should_stop()) {
                 unsigned long pflags;
  
+               /*
+                * Avoid the unnecessary wakeup for proactive compaction
+                * when it is disabled.
+                */
+               if (!sysctl_compaction_proactiveness)
+                       timeout = MAX_SCHEDULE_TIMEOUT;
                 trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
                 if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
-                       kcompactd_work_requested(pgdat),
-                       msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) {
+                       kcompactd_work_requested(pgdat), timeout) &&
+                       !pgdat->proactive_compact_trigger) {
  
                         psi_memstall_enter(&pflags);
                         kcompactd_do_work(pgdat);
                         psi_memstall_leave(&pflags);
+                       /*
+                        * Reset the timeout value. The defer timeout from
+                        * proactive compaction is lost here but that is fine
+                        * as the condition of the zone changing substantionally
+                        * then carrying on with the previous defer interval is
+                        * not useful.
+                        */
+                       timeout = default_timeout;
                         continue;
                 }
  
-               /* kcompactd wait timeout */
+               /*
+                * Start the proactive work with default timeout. Based
+                * on the fragmentation score, this timeout is updated.
+                */
+               timeout = default_timeout;
                 if (should_proactive_compact_node(pgdat)) {
                         unsigned int prev_score, score;
  
-                       if (proactive_defer) {
-                               proactive_defer--;
-                               continue;
-                       }
                         prev_score = fragmentation_score_node(pgdat);
                         proactive_compact_node(pgdat);
                         score = fragmentation_score_node(pgdat);
@@ -2928,9 +2960,12 @@ static int kcompactd(void *p)
                          * Defer proactive compaction if the fragmentation
                          * score did not go down i.e. no progress made.
                          */
-                       proactive_defer = score < prev_score ?
-                                       0 : 1 << COMPACT_MAX_DEFER_SHIFT;
+                       if (unlikely(score >= prev_score))
+                               timeout =
+                                  default_timeout << COMPACT_MAX_DEFER_SHIFT;
                 }
+               if (unlikely(pgdat->proactive_compact_trigger))
+                       pgdat->proactive_compact_trigger = false;
         }
  
         return 0;