tools headers UAPI: Sync linux/prctl.h with the kernel sources
[linux-2.6-microblaze.git] / mm / compaction.c
index e04f447..84fde27 100644 (file)
@@ -787,15 +787,14 @@ static bool too_many_isolated(pg_data_t *pgdat)
  *
  * Isolate all pages that can be migrated from the range specified by
  * [low_pfn, end_pfn). The range is expected to be within same pageblock.
- * Returns zero if there is a fatal signal pending, otherwise PFN of the
- * first page that was not scanned (which may be both less, equal to or more
- * than end_pfn).
+ * Returns errno, like -EAGAIN or -EINTR in case e.g signal pending or congestion,
+ * -ENOMEM in case we could not allocate a page, or 0.
+ * cc->migrate_pfn will contain the next pfn to scan.
  *
  * The pages are isolated on cc->migratepages list (not required to be empty),
- * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
- * is neither read nor updated.
+ * and cc->nr_migratepages is updated accordingly.
  */
-static unsigned long
+static int
 isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                        unsigned long end_pfn, isolate_mode_t isolate_mode)
 {
@@ -809,6 +808,9 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
        bool skip_on_failure = false;
        unsigned long next_skip_pfn = 0;
        bool skip_updated = false;
+       int ret = 0;
+
+       cc->migrate_pfn = low_pfn;
 
        /*
         * Ensure that there are not too many pages isolated from the LRU
@@ -818,16 +820,16 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
        while (unlikely(too_many_isolated(pgdat))) {
                /* stop isolation if there are still pages not migrated */
                if (cc->nr_migratepages)
-                       return 0;
+                       return -EAGAIN;
 
                /* async migration should just abort */
                if (cc->mode == MIGRATE_ASYNC)
-                       return 0;
+                       return -EAGAIN;
 
                congestion_wait(BLK_RW_ASYNC, HZ/10);
 
                if (fatal_signal_pending(current))
-                       return 0;
+                       return -EINTR;
        }
 
        cond_resched();
@@ -875,8 +877,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
                        if (fatal_signal_pending(current)) {
                                cc->contended = true;
+                               ret = -EINTR;
 
-                               low_pfn = 0;
                                goto fatal_pending;
                        }
 
@@ -904,6 +906,38 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                        valid_page = page;
                }
 
+               if (PageHuge(page) && cc->alloc_contig) {
+                       ret = isolate_or_dissolve_huge_page(page, &cc->migratepages);
+
+                       /*
+                        * Fail isolation in case isolate_or_dissolve_huge_page()
+                        * reports an error. In case of -ENOMEM, abort right away.
+                        */
+                       if (ret < 0) {
+                                /* Do not report -EBUSY down the chain */
+                               if (ret == -EBUSY)
+                                       ret = 0;
+                               low_pfn += (1UL << compound_order(page)) - 1;
+                               goto isolate_fail;
+                       }
+
+                       if (PageHuge(page)) {
+                               /*
+                                * Hugepage was successfully isolated and placed
+                                * on the cc->migratepages list.
+                                */
+                               low_pfn += compound_nr(page) - 1;
+                               goto isolate_success_no_list;
+                       }
+
+                       /*
+                        * Ok, the hugepage was dissolved. Now these pages are
+                        * Buddy and cannot be re-allocated because they are
+                        * isolated. Fall-through as the check below handles
+                        * Buddy pages.
+                        */
+               }
+
                /*
                 * Skip if free. We read page order here without zone lock
                 * which is generally unsafe, but the race window is small and
@@ -1037,6 +1071,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
 isolate_success:
                list_add(&page->lru, &cc->migratepages);
+isolate_success_no_list:
                cc->nr_migratepages += compound_nr(page);
                nr_isolated += compound_nr(page);
 
@@ -1063,7 +1098,7 @@ isolate_fail_put:
                put_page(page);
 
 isolate_fail:
-               if (!skip_on_failure)
+               if (!skip_on_failure && ret != -ENOMEM)
                        continue;
 
                /*
@@ -1089,6 +1124,9 @@ isolate_fail:
                         */
                        next_skip_pfn += 1UL << cc->order;
                }
+
+               if (ret == -ENOMEM)
+                       break;
        }
 
        /*
@@ -1130,7 +1168,9 @@ fatal_pending:
        if (nr_isolated)
                count_compact_events(COMPACTISOLATED, nr_isolated);
 
-       return low_pfn;
+       cc->migrate_pfn = low_pfn;
+
+       return ret;
 }
 
 /**
@@ -1139,15 +1179,15 @@ fatal_pending:
  * @start_pfn: The first PFN to start isolating.
  * @end_pfn:   The one-past-last PFN.
  *
- * Returns zero if isolation fails fatally due to e.g. pending signal.
- * Otherwise, function returns one-past-the-last PFN of isolated page
- * (which may be greater than end_pfn if end fell in a middle of a THP page).
+ * Returns -EAGAIN when contented, -EINTR in case of a signal pending, -ENOMEM
+ * in case we could not allocate a page, or 0.
  */
-unsigned long
+int
 isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
                                                        unsigned long end_pfn)
 {
        unsigned long pfn, block_start_pfn, block_end_pfn;
+       int ret = 0;
 
        /* Scan block by block. First and last block may be incomplete */
        pfn = start_pfn;
@@ -1166,17 +1206,17 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
                                        block_end_pfn, cc->zone))
                        continue;
 
-               pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
-                                                       ISOLATE_UNEVICTABLE);
+               ret = isolate_migratepages_block(cc, pfn, block_end_pfn,
+                                                ISOLATE_UNEVICTABLE);
 
-               if (!pfn)
+               if (ret)
                        break;
 
                if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
                        break;
        }
 
-       return pfn;
+       return ret;
 }
 
 #endif /* CONFIG_COMPACTION || CONFIG_CMA */
@@ -1847,7 +1887,7 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
         */
        for (; block_end_pfn <= cc->free_pfn;
                        fast_find_block = false,
-                       low_pfn = block_end_pfn,
+                       cc->migrate_pfn = low_pfn = block_end_pfn,
                        block_start_pfn = block_end_pfn,
                        block_end_pfn += pageblock_nr_pages) {
 
@@ -1889,10 +1929,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
                }
 
                /* Perform the isolation */
-               low_pfn = isolate_migratepages_block(cc, low_pfn,
-                                               block_end_pfn, isolate_mode);
-
-               if (!low_pfn)
+               if (isolate_migratepages_block(cc, low_pfn, block_end_pfn,
+                                               isolate_mode))
                        return ISOLATE_ABORT;
 
                /*
@@ -1903,9 +1941,6 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
                break;
        }
 
-       /* Record where migration scanner will be restarted. */
-       cc->migrate_pfn = low_pfn;
-
        return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
 }
 
@@ -1977,8 +2012,8 @@ static unsigned int fragmentation_score_wmark(pg_data_t *pgdat, bool low)
        unsigned int wmark_low;
 
        /*
-        * Cap the low watermak to avoid excessive compaction
-        * activity in case a user sets the proactivess tunable
+        * Cap the low watermark to avoid excessive compaction
+        * activity in case a user sets the proactiveness tunable
         * close to 100 (maximum).
         */
        wmark_low = max(100U - sysctl_compaction_proactiveness, 5U);
@@ -2319,7 +2354,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
        trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
                                cc->free_pfn, end_pfn, sync);
 
-       migrate_prep_local();
+       /* lru_add_drain_all could be expensive with involving other CPUs */
+       lru_add_drain();
 
        while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
                int err;
@@ -2494,6 +2530,14 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
         */
        WRITE_ONCE(current->capture_control, NULL);
        *capture = READ_ONCE(capc.page);
+       /*
+        * Technically, it is also possible that compaction is skipped but
+        * the page is still captured out of luck(IRQ came and freed the page).
+        * Returning COMPACT_SUCCESS in such cases helps in properly accounting
+        * the COMPACT[STALL|FAIL] when compaction is skipped.
+        */
+       if (*capture)
+               ret = COMPACT_SUCCESS;
 
        return ret;
 }
@@ -2657,9 +2701,6 @@ static void compact_nodes(void)
                compact_node(nid);
 }
 
-/* The written value is actually unused, all memory is compacted */
-int sysctl_compact_memory;
-
 /*
  * Tunable for proactive compaction. It determines how
  * aggressively the kernel should compact memory in the
@@ -2844,7 +2885,7 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int highest_zoneidx)
  */
 static int kcompactd(void *p)
 {
-       pg_data_t *pgdat = (pg_data_t*)p;
+       pg_data_t *pgdat = (pg_data_t *)p;
        struct task_struct *tsk = current;
        unsigned int proactive_defer = 0;