hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization
[linux-2.6-microblaze.git] / mm / migrate.c
index 462163f..5d1839a 100644 (file)
@@ -374,55 +374,21 @@ unlock:
 }
 #endif
 
-#ifdef CONFIG_BLOCK
-/* Returns true if all buffers are successfully locked */
-static bool buffer_migrate_lock_buffers(struct buffer_head *head,
-                                                       enum migrate_mode mode)
+static int expected_page_refs(struct page *page)
 {
-       struct buffer_head *bh = head;
+       int expected_count = 1;
 
-       /* Simple case, sync compaction */
-       if (mode != MIGRATE_ASYNC) {
-               do {
-                       get_bh(bh);
-                       lock_buffer(bh);
-                       bh = bh->b_this_page;
-
-               } while (bh != head);
-
-               return true;
-       }
-
-       /* async case, we cannot block on lock_buffer so use trylock_buffer */
-       do {
-               get_bh(bh);
-               if (!trylock_buffer(bh)) {
-                       /*
-                        * We failed to lock the buffer and cannot stall in
-                        * async migration. Release the taken locks
-                        */
-                       struct buffer_head *failed_bh = bh;
-                       put_bh(failed_bh);
-                       bh = head;
-                       while (bh != failed_bh) {
-                               unlock_buffer(bh);
-                               put_bh(bh);
-                               bh = bh->b_this_page;
-                       }
-                       return false;
-               }
+       /*
+        * Device public or private pages have an extra refcount as they are
+        * ZONE_DEVICE pages.
+        */
+       expected_count += is_device_private_page(page);
+       expected_count += is_device_public_page(page);
+       if (page_mapping(page))
+               expected_count += hpage_nr_pages(page) + page_has_private(page);
 
-               bh = bh->b_this_page;
-       } while (bh != head);
-       return true;
+       return expected_count;
 }
-#else
-static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
-                                                       enum migrate_mode mode)
-{
-       return true;
-}
-#endif /* CONFIG_BLOCK */
 
 /*
  * Replace the page in the mapping.
@@ -433,21 +399,13 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
  */
 int migrate_page_move_mapping(struct address_space *mapping,
-               struct page *newpage, struct page *page,
-               struct buffer_head *head, enum migrate_mode mode,
+               struct page *newpage, struct page *page, enum migrate_mode mode,
                int extra_count)
 {
        XA_STATE(xas, &mapping->i_pages, page_index(page));
        struct zone *oldzone, *newzone;
        int dirty;
-       int expected_count = 1 + extra_count;
-
-       /*
-        * Device public or private pages have an extra refcount as they are
-        * ZONE_DEVICE pages.
-        */
-       expected_count += is_device_private_page(page);
-       expected_count += is_device_public_page(page);
+       int expected_count = expected_page_refs(page) + extra_count;
 
        if (!mapping) {
                /* Anonymous page without mapping */
@@ -467,8 +425,6 @@ int migrate_page_move_mapping(struct address_space *mapping,
        newzone = page_zone(newpage);
 
        xas_lock_irq(&xas);
-
-       expected_count += hpage_nr_pages(page) + page_has_private(page);
        if (page_count(page) != expected_count || xas_load(&xas) != page) {
                xas_unlock_irq(&xas);
                return -EAGAIN;
@@ -479,20 +435,6 @@ int migrate_page_move_mapping(struct address_space *mapping,
                return -EAGAIN;
        }
 
-       /*
-        * In the async migration case of moving a page with buffers, lock the
-        * buffers using trylock before the mapping is moved. If the mapping
-        * was moved, we later failed to lock the buffers and could not move
-        * the mapping back due to an elevated page count, we would have to
-        * block waiting on other references to be dropped.
-        */
-       if (mode == MIGRATE_ASYNC && head &&
-                       !buffer_migrate_lock_buffers(head, mode)) {
-               page_ref_unfreeze(page, expected_count);
-               xas_unlock_irq(&xas);
-               return -EAGAIN;
-       }
-
        /*
         * Now we know that no one else is looking at the page:
         * no turning back from here.
@@ -744,7 +686,7 @@ int migrate_page(struct address_space *mapping,
 
        BUG_ON(PageWriteback(page));    /* Writeback must be complete */
 
-       rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+       rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
 
        if (rc != MIGRATEPAGE_SUCCESS)
                return rc;
@@ -758,34 +700,98 @@ int migrate_page(struct address_space *mapping,
 EXPORT_SYMBOL(migrate_page);
 
 #ifdef CONFIG_BLOCK
-/*
- * Migration function for pages with buffers. This function can only be used
- * if the underlying filesystem guarantees that no other references to "page"
- * exist.
- */
-int buffer_migrate_page(struct address_space *mapping,
-               struct page *newpage, struct page *page, enum migrate_mode mode)
+/* Returns true if all buffers are successfully locked */
+static bool buffer_migrate_lock_buffers(struct buffer_head *head,
+                                                       enum migrate_mode mode)
+{
+       struct buffer_head *bh = head;
+
+       /* Simple case, sync compaction */
+       if (mode != MIGRATE_ASYNC) {
+               do {
+                       get_bh(bh);
+                       lock_buffer(bh);
+                       bh = bh->b_this_page;
+
+               } while (bh != head);
+
+               return true;
+       }
+
+       /* async case, we cannot block on lock_buffer so use trylock_buffer */
+       do {
+               get_bh(bh);
+               if (!trylock_buffer(bh)) {
+                       /*
+                        * We failed to lock the buffer and cannot stall in
+                        * async migration. Release the taken locks
+                        */
+                       struct buffer_head *failed_bh = bh;
+                       put_bh(failed_bh);
+                       bh = head;
+                       while (bh != failed_bh) {
+                               unlock_buffer(bh);
+                               put_bh(bh);
+                               bh = bh->b_this_page;
+                       }
+                       return false;
+               }
+
+               bh = bh->b_this_page;
+       } while (bh != head);
+       return true;
+}
+
+static int __buffer_migrate_page(struct address_space *mapping,
+               struct page *newpage, struct page *page, enum migrate_mode mode,
+               bool check_refs)
 {
        struct buffer_head *bh, *head;
        int rc;
+       int expected_count;
 
        if (!page_has_buffers(page))
                return migrate_page(mapping, newpage, page, mode);
 
+       /* Check whether page does not have extra refs before we do more work */
+       expected_count = expected_page_refs(page);
+       if (page_count(page) != expected_count)
+               return -EAGAIN;
+
        head = page_buffers(page);
+       if (!buffer_migrate_lock_buffers(head, mode))
+               return -EAGAIN;
 
-       rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
+       if (check_refs) {
+               bool busy;
+               bool invalidated = false;
 
-       if (rc != MIGRATEPAGE_SUCCESS)
-               return rc;
+recheck_buffers:
+               busy = false;
+               spin_lock(&mapping->private_lock);
+               bh = head;
+               do {
+                       if (atomic_read(&bh->b_count)) {
+                               busy = true;
+                               break;
+                       }
+                       bh = bh->b_this_page;
+               } while (bh != head);
+               spin_unlock(&mapping->private_lock);
+               if (busy) {
+                       if (invalidated) {
+                               rc = -EAGAIN;
+                               goto unlock_buffers;
+                       }
+                       invalidate_bh_lrus();
+                       invalidated = true;
+                       goto recheck_buffers;
+               }
+       }
 
-       /*
-        * In the async case, migrate_page_move_mapping locked the buffers
-        * with an IRQ-safe spinlock held. In the sync case, the buffers
-        * need to be locked now
-        */
-       if (mode != MIGRATE_ASYNC)
-               BUG_ON(!buffer_migrate_lock_buffers(head, mode));
+       rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0);
+       if (rc != MIGRATEPAGE_SUCCESS)
+               goto unlock_buffers;
 
        ClearPagePrivate(page);
        set_page_private(newpage, page_private(page));
@@ -807,6 +813,8 @@ int buffer_migrate_page(struct address_space *mapping,
        else
                migrate_page_states(newpage, page);
 
+       rc = MIGRATEPAGE_SUCCESS;
+unlock_buffers:
        bh = head;
        do {
                unlock_buffer(bh);
@@ -815,9 +823,32 @@ int buffer_migrate_page(struct address_space *mapping,
 
        } while (bh != head);
 
-       return MIGRATEPAGE_SUCCESS;
+       return rc;
+}
+
+/*
+ * Migration function for pages with buffers. This function can only be used
+ * if the underlying filesystem guarantees that no other references to "page"
+ * exist. For example attached buffer heads are accessed only under page lock.
+ */
+int buffer_migrate_page(struct address_space *mapping,
+               struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+       return __buffer_migrate_page(mapping, newpage, page, mode, false);
 }
 EXPORT_SYMBOL(buffer_migrate_page);
+
+/*
+ * Same as above except that this variant is more careful and checks that there
+ * are also no buffer head references. This function is the right one for
+ * mappings where buffer heads are directly looked up and referenced (such as
+ * block device mappings).
+ */
+int buffer_migrate_page_norefs(struct address_space *mapping,
+               struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+       return __buffer_migrate_page(mapping, newpage, page, mode, true);
+}
 #endif
 
 /*
@@ -1293,8 +1324,19 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
                goto put_anon;
 
        if (page_mapped(hpage)) {
+               struct address_space *mapping = page_mapping(hpage);
+
+               /*
+                * try_to_unmap could potentially call huge_pmd_unshare.
+                * Because of this, take semaphore in write mode here and
+                * set TTU_RMAP_LOCKED to let lower levels know we have
+                * taken the lock.
+                */
+               i_mmap_lock_write(mapping);
                try_to_unmap(hpage,
-                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+                       TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
+                       TTU_RMAP_LOCKED);
+               i_mmap_unlock_write(mapping);
                page_was_mapped = 1;
        }