Merge branch 'etnaviv/fixes' of https://git.pengutronix.de/git/lst/linux into drm...
[linux-2.6-microblaze.git] / mm / page-writeback.c
index 84ae9bf..3f690ba 100644 (file)
@@ -2097,34 +2097,25 @@ void __init page_writeback_init(void)
  * dirty pages in the file (thus it is important for this function to be quick
  * so that it can tag pages faster than a dirtying process can create them).
  */
-/*
- * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
- * latency.
- */
 void tag_pages_for_writeback(struct address_space *mapping,
                             pgoff_t start, pgoff_t end)
 {
-#define WRITEBACK_TAG_BATCH 4096
-       unsigned long tagged = 0;
-       struct radix_tree_iter iter;
-       void **slot;
+       XA_STATE(xas, &mapping->i_pages, start);
+       unsigned int tagged = 0;
+       void *page;
 
-       xa_lock_irq(&mapping->i_pages);
-       radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
-                                                       PAGECACHE_TAG_DIRTY) {
-               if (iter.index > end)
-                       break;
-               radix_tree_iter_tag_set(&mapping->i_pages, &iter,
-                                                       PAGECACHE_TAG_TOWRITE);
-               tagged++;
-               if ((tagged % WRITEBACK_TAG_BATCH) != 0)
+       xas_lock_irq(&xas);
+       xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
+               xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
+               if (++tagged % XA_CHECK_SCHED)
                        continue;
-               slot = radix_tree_iter_resume(slot, &iter);
-               xa_unlock_irq(&mapping->i_pages);
+
+               xas_pause(&xas);
+               xas_unlock_irq(&xas);
                cond_resched();
-               xa_lock_irq(&mapping->i_pages);
+               xas_lock_irq(&xas);
        }
-       xa_unlock_irq(&mapping->i_pages);
+       xas_unlock_irq(&xas);
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
@@ -2149,6 +2140,13 @@ EXPORT_SYMBOL(tag_pages_for_writeback);
  * not miss some pages (e.g., because some other process has cleared TOWRITE
  * tag we set). The rule we follow is that TOWRITE tag can be cleared only
  * by the process clearing the DIRTY tag (and submitting the page for IO).
+ *
+ * To avoid deadlocks between range_cyclic writeback and callers that hold
+ * pages in PageWriteback to aggregate IO until write_cache_pages() returns,
+ * we do not loop back to the start of the file. Doing so causes a page
+ * lock/page writeback access order inversion - we should only ever lock
+ * multiple pages in ascending page->index order, and looping back to the start
+ * of the file violates that rule and causes deadlocks.
  */
 int write_cache_pages(struct address_space *mapping,
                      struct writeback_control *wbc, writepage_t writepage,
@@ -2162,31 +2160,24 @@ int write_cache_pages(struct address_space *mapping,
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        pgoff_t done_index;
-       int cycled;
        int range_whole = 0;
-       int tag;
+       xa_mark_t tag;
 
        pagevec_init(&pvec);
        if (wbc->range_cyclic) {
                writeback_index = mapping->writeback_index; /* prev offset */
                index = writeback_index;
-               if (index == 0)
-                       cycled = 1;
-               else
-                       cycled = 0;
                end = -1;
        } else {
                index = wbc->range_start >> PAGE_SHIFT;
                end = wbc->range_end >> PAGE_SHIFT;
                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
                        range_whole = 1;
-               cycled = 1; /* ignore range_cyclic tests */
        }
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag = PAGECACHE_TAG_TOWRITE;
        else
                tag = PAGECACHE_TAG_DIRTY;
-retry:
        if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
                tag_pages_for_writeback(mapping, index, end);
        done_index = index;
@@ -2272,17 +2263,14 @@ continue_unlock:
                pagevec_release(&pvec);
                cond_resched();
        }
-       if (!cycled && !done) {
-               /*
-                * range_cyclic:
-                * We hit the last page and there is more work to be done: wrap
-                * back to the start of the file
-                */
-               cycled = 1;
-               index = 0;
-               end = writeback_index - 1;
-               goto retry;
-       }
+
+       /*
+        * If we hit the last page and there is more work to be done: wrap
+        * back the index back to the start of the file for the next
+        * time we are called.
+        */
+       if (wbc->range_cyclic && !done)
+               done_index = 0;
        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
                mapping->writeback_index = done_index;
 
@@ -2445,7 +2433,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 
 /*
  * For address_spaces which do not use buffers.  Just tag the page as dirty in
- * its radix tree.
+ * the xarray.
  *
  * This is also used when a single buffer is being dirtied: we want to set the
  * page dirty in that case, but not all the buffers.  This is a "bottom-up"
@@ -2471,7 +2459,7 @@ int __set_page_dirty_nobuffers(struct page *page)
                BUG_ON(page_mapping(page) != mapping);
                WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
                account_page_dirtied(page, mapping);
-               radix_tree_tag_set(&mapping->i_pages, page_index(page),
+               __xa_set_mark(&mapping->i_pages, page_index(page),
                                   PAGECACHE_TAG_DIRTY);
                xa_unlock_irqrestore(&mapping->i_pages, flags);
                unlock_page_memcg(page);
@@ -2634,13 +2622,13 @@ EXPORT_SYMBOL(__cancel_dirty_page);
  * Returns true if the page was previously dirty.
  *
  * This is for preparing to put the page under writeout.  We leave the page
- * tagged as dirty in the radix tree so that a concurrent write-for-sync
+ * tagged as dirty in the xarray so that a concurrent write-for-sync
  * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage
  * implementation will run either set_page_writeback() or set_page_dirty(),
- * at which stage we bring the page's dirty flag and radix-tree dirty tag
+ * at which stage we bring the page's dirty flag and xarray dirty tag
  * back into sync.
  *
- * This incoherency between the page's dirty flag and radix-tree tag is
+ * This incoherency between the page's dirty flag and xarray tag is
  * unfortunate, but it only exists while the page is locked.
  */
 int clear_page_dirty_for_io(struct page *page)
@@ -2721,7 +2709,7 @@ int test_clear_page_writeback(struct page *page)
                xa_lock_irqsave(&mapping->i_pages, flags);
                ret = TestClearPageWriteback(page);
                if (ret) {
-                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+                       __xa_clear_mark(&mapping->i_pages, page_index(page),
                                                PAGECACHE_TAG_WRITEBACK);
                        if (bdi_cap_account_writeback(bdi)) {
                                struct bdi_writeback *wb = inode_to_wb(inode);
@@ -2761,11 +2749,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 
        lock_page_memcg(page);
        if (mapping && mapping_use_writeback_tags(mapping)) {
+               XA_STATE(xas, &mapping->i_pages, page_index(page));
                struct inode *inode = mapping->host;
                struct backing_dev_info *bdi = inode_to_bdi(inode);
                unsigned long flags;
 
-               xa_lock_irqsave(&mapping->i_pages, flags);
+               xas_lock_irqsave(&xas, flags);
+               xas_load(&xas);
                ret = TestSetPageWriteback(page);
                if (!ret) {
                        bool on_wblist;
@@ -2773,8 +2763,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                        on_wblist = mapping_tagged(mapping,
                                                   PAGECACHE_TAG_WRITEBACK);
 
-                       radix_tree_tag_set(&mapping->i_pages, page_index(page),
-                                               PAGECACHE_TAG_WRITEBACK);
+                       xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
                        if (bdi_cap_account_writeback(bdi))
                                inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 
@@ -2787,12 +2776,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
                                sb_mark_inode_writeback(mapping->host);
                }
                if (!PageDirty(page))
-                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-                                               PAGECACHE_TAG_DIRTY);
+                       xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
                if (!keep_write)
-                       radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-                                               PAGECACHE_TAG_TOWRITE);
-               xa_unlock_irqrestore(&mapping->i_pages, flags);
+                       xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
+               xas_unlock_irqrestore(&xas, flags);
        } else {
                ret = TestSetPageWriteback(page);
        }
@@ -2806,16 +2793,6 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 }
 EXPORT_SYMBOL(__test_set_page_writeback);
 
-/*
- * Return true if any of the pages in the mapping are marked with the
- * passed tag.
- */
-int mapping_tagged(struct address_space *mapping, int tag)
-{
-       return radix_tree_tagged(&mapping->i_pages, tag);
-}
-EXPORT_SYMBOL(mapping_tagged);
-
 /**
  * wait_for_stable_page() - wait for writeback to finish, if necessary.
  * @page:      The page to wait on.