Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-microblaze.git] / mm / shmem.c
index 4469426..56bf122 100644 (file)
@@ -322,24 +322,20 @@ void shmem_uncharge(struct inode *inode, long pages)
 }
 
 /*
- * Replace item expected in radix tree by a new item, while holding tree lock.
+ * Replace item expected in xarray by a new item, while holding xa_lock.
  */
-static int shmem_radix_tree_replace(struct address_space *mapping,
+static int shmem_replace_entry(struct address_space *mapping,
                        pgoff_t index, void *expected, void *replacement)
 {
-       struct radix_tree_node *node;
-       void __rcu **pslot;
+       XA_STATE(xas, &mapping->i_pages, index);
        void *item;
 
        VM_BUG_ON(!expected);
        VM_BUG_ON(!replacement);
-       item = __radix_tree_lookup(&mapping->i_pages, index, &node, &pslot);
-       if (!item)
-               return -ENOENT;
+       item = xas_load(&xas);
        if (item != expected)
                return -ENOENT;
-       __radix_tree_replace(&mapping->i_pages, node, pslot,
-                            replacement, NULL);
+       xas_store(&xas, replacement);
        return 0;
 }
 
@@ -353,12 +349,7 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
 static bool shmem_confirm_swap(struct address_space *mapping,
                               pgoff_t index, swp_entry_t swap)
 {
-       void *item;
-
-       rcu_read_lock();
-       item = radix_tree_lookup(&mapping->i_pages, index);
-       rcu_read_unlock();
-       return item == swp_to_radix_entry(swap);
+       return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap);
 }
 
 /*
@@ -586,9 +577,11 @@ static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
  */
 static int shmem_add_to_page_cache(struct page *page,
                                   struct address_space *mapping,
-                                  pgoff_t index, void *expected)
+                                  pgoff_t index, void *expected, gfp_t gfp)
 {
-       int error, nr = hpage_nr_pages(page);
+       XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
+       unsigned long i = 0;
+       unsigned long nr = 1UL << compound_order(page);
 
        VM_BUG_ON_PAGE(PageTail(page), page);
        VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -600,47 +593,39 @@ static int shmem_add_to_page_cache(struct page *page,
        page->mapping = mapping;
        page->index = index;
 
-       xa_lock_irq(&mapping->i_pages);
-       if (PageTransHuge(page)) {
-               void __rcu **results;
-               pgoff_t idx;
-               int i;
-
-               error = 0;
-               if (radix_tree_gang_lookup_slot(&mapping->i_pages,
-                                       &results, &idx, index, 1) &&
-                               idx < index + HPAGE_PMD_NR) {
-                       error = -EEXIST;
+       do {
+               void *entry;
+               xas_lock_irq(&xas);
+               entry = xas_find_conflict(&xas);
+               if (entry != expected)
+                       xas_set_err(&xas, -EEXIST);
+               xas_create_range(&xas);
+               if (xas_error(&xas))
+                       goto unlock;
+next:
+               xas_store(&xas, page + i);
+               if (++i < nr) {
+                       xas_next(&xas);
+                       goto next;
                }
-
-               if (!error) {
-                       for (i = 0; i < HPAGE_PMD_NR; i++) {
-                               error = radix_tree_insert(&mapping->i_pages,
-                                               index + i, page + i);
-                               VM_BUG_ON(error);
-                       }
+               if (PageTransHuge(page)) {
                        count_vm_event(THP_FILE_ALLOC);
+                       __inc_node_page_state(page, NR_SHMEM_THPS);
                }
-       } else if (!expected) {
-               error = radix_tree_insert(&mapping->i_pages, index, page);
-       } else {
-               error = shmem_radix_tree_replace(mapping, index, expected,
-                                                                page);
-       }
-
-       if (!error) {
                mapping->nrpages += nr;
-               if (PageTransHuge(page))
-                       __inc_node_page_state(page, NR_SHMEM_THPS);
                __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
                __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
-               xa_unlock_irq(&mapping->i_pages);
-       } else {
+unlock:
+               xas_unlock_irq(&xas);
+       } while (xas_nomem(&xas, gfp));
+
+       if (xas_error(&xas)) {
                page->mapping = NULL;
-               xa_unlock_irq(&mapping->i_pages);
                page_ref_sub(page, nr);
+               return xas_error(&xas);
        }
-       return error;
+
+       return 0;
 }
 
 /*
@@ -654,7 +639,7 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
        VM_BUG_ON_PAGE(PageCompound(page), page);
 
        xa_lock_irq(&mapping->i_pages);
-       error = shmem_radix_tree_replace(mapping, page->index, page, radswap);
+       error = shmem_replace_entry(mapping, page->index, page, radswap);
        page->mapping = NULL;
        mapping->nrpages--;
        __dec_node_page_state(page, NR_FILE_PAGES);
@@ -665,7 +650,7 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 }
 
 /*
- * Remove swap entry from radix tree, free the swap and its page cache.
+ * Remove swap entry from page cache, free the swap and its page cache.
  */
 static int shmem_free_swap(struct address_space *mapping,
                           pgoff_t index, void *radswap)
@@ -673,7 +658,7 @@ static int shmem_free_swap(struct address_space *mapping,
        void *old;
 
        xa_lock_irq(&mapping->i_pages);
-       old = radix_tree_delete_item(&mapping->i_pages, index, radswap);
+       old = __xa_cmpxchg(&mapping->i_pages, index, radswap, NULL, 0);
        xa_unlock_irq(&mapping->i_pages);
        if (old != radswap)
                return -ENOENT;
@@ -691,29 +676,19 @@ static int shmem_free_swap(struct address_space *mapping,
 unsigned long shmem_partial_swap_usage(struct address_space *mapping,
                                                pgoff_t start, pgoff_t end)
 {
-       struct radix_tree_iter iter;
-       void __rcu **slot;
+       XA_STATE(xas, &mapping->i_pages, start);
        struct page *page;
        unsigned long swapped = 0;
 
        rcu_read_lock();
-
-       radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
-               if (iter.index >= end)
-                       break;
-
-               page = radix_tree_deref_slot(slot);
-
-               if (radix_tree_deref_retry(page)) {
-                       slot = radix_tree_iter_retry(&iter);
+       xas_for_each(&xas, page, end - 1) {
+               if (xas_retry(&xas, page))
                        continue;
-               }
-
-               if (radix_tree_exceptional_entry(page))
+               if (xa_is_value(page))
                        swapped++;
 
                if (need_resched()) {
-                       slot = radix_tree_iter_resume(slot, &iter);
+                       xas_pause(&xas);
                        cond_resched_rcu();
                }
        }
@@ -788,7 +763,7 @@ void shmem_unlock_mapping(struct address_space *mapping)
 }
 
 /*
- * Remove range of pages and swap entries from radix tree, and free them.
+ * Remove range of pages and swap entries from page cache, and free them.
  * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
  */
 static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
@@ -824,7 +799,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        if (index >= end)
                                break;
 
-                       if (radix_tree_exceptional_entry(page)) {
+                       if (xa_is_value(page)) {
                                if (unfalloc)
                                        continue;
                                nr_swaps_freed += !shmem_free_swap(mapping,
@@ -921,7 +896,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        if (index >= end)
                                break;
 
-                       if (radix_tree_exceptional_entry(page)) {
+                       if (xa_is_value(page)) {
                                if (unfalloc)
                                        continue;
                                if (shmem_free_swap(mapping, index, page)) {
@@ -1110,34 +1085,27 @@ static void shmem_evict_inode(struct inode *inode)
        clear_inode(inode);
 }
 
-static unsigned long find_swap_entry(struct radix_tree_root *root, void *item)
+static unsigned long find_swap_entry(struct xarray *xa, void *item)
 {
-       struct radix_tree_iter iter;
-       void __rcu **slot;
-       unsigned long found = -1;
+       XA_STATE(xas, xa, 0);
        unsigned int checked = 0;
+       void *entry;
 
        rcu_read_lock();
-       radix_tree_for_each_slot(slot, root, &iter, 0) {
-               void *entry = radix_tree_deref_slot(slot);
-
-               if (radix_tree_deref_retry(entry)) {
-                       slot = radix_tree_iter_retry(&iter);
+       xas_for_each(&xas, entry, ULONG_MAX) {
+               if (xas_retry(&xas, entry))
                        continue;
-               }
-               if (entry == item) {
-                       found = iter.index;
+               if (entry == item)
                        break;
-               }
                checked++;
-               if ((checked % 4096) != 0)
+               if ((checked % XA_CHECK_SCHED) != 0)
                        continue;
-               slot = radix_tree_iter_resume(slot, &iter);
+               xas_pause(&xas);
                cond_resched_rcu();
        }
-
        rcu_read_unlock();
-       return found;
+
+       return entry ? xas.xa_index : -1;
 }
 
 /*
@@ -1175,10 +1143,10 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
                 * We needed to drop mutex to make that restrictive page
                 * allocation, but the inode might have been freed while we
                 * dropped it: although a racing shmem_evict_inode() cannot
-                * complete without emptying the radix_tree, our page lock
+                * complete without emptying the page cache, our page lock
                 * on this swapcache page is not enough to prevent that -
                 * free_swap_and_cache() of our swap entry will only
-                * trylock_page(), removing swap from radix_tree whatever.
+                * trylock_page(), removing swap from page cache whatever.
                 *
                 * We must not proceed to shmem_add_to_page_cache() if the
                 * inode has been freed, but of course we cannot rely on
@@ -1200,7 +1168,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info,
         */
        if (!error)
                error = shmem_add_to_page_cache(*pagep, mapping, index,
-                                               radswap);
+                                               radswap, gfp);
        if (error != -ENOMEM) {
                /*
                 * Truncation and eviction use free_swap_and_cache(), which
@@ -1244,7 +1212,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page)
                                            &memcg, false);
        if (error)
                goto out;
-       /* No radix_tree_preload: swap entry keeps a place for page in tree */
+       /* No memory allocation: swap entry occupies the slot for the page */
        error = -EAGAIN;
 
        mutex_lock(&shmem_swaplist_mutex);
@@ -1453,23 +1421,17 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
                struct shmem_inode_info *info, pgoff_t index)
 {
        struct vm_area_struct pvma;
-       struct inode *inode = &info->vfs_inode;
-       struct address_space *mapping = inode->i_mapping;
-       pgoff_t idx, hindex;
-       void __rcu **results;
+       struct address_space *mapping = info->vfs_inode.i_mapping;
+       pgoff_t hindex;
        struct page *page;
 
        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
                return NULL;
 
        hindex = round_down(index, HPAGE_PMD_NR);
-       rcu_read_lock();
-       if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx,
-                               hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
-               rcu_read_unlock();
+       if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1,
+                                                               XA_PRESENT))
                return NULL;
-       }
-       rcu_read_unlock();
 
        shmem_pseudo_vma_init(&pvma, info, hindex);
        page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
@@ -1578,8 +1540,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
         * a nice clean interface for us to replace oldpage by newpage there.
         */
        xa_lock_irq(&swap_mapping->i_pages);
-       error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage,
-                                                                  newpage);
+       error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage);
        if (!error) {
                __inc_node_page_state(newpage, NR_FILE_PAGES);
                __dec_node_page_state(oldpage, NR_FILE_PAGES);
@@ -1643,7 +1604,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 repeat:
        swap.val = 0;
        page = find_lock_entry(mapping, index);
-       if (radix_tree_exceptional_entry(page)) {
+       if (xa_is_value(page)) {
                swap = radix_to_swp_entry(page);
                page = NULL;
        }
@@ -1718,7 +1679,7 @@ repeat:
                                false);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
-                                               swp_to_radix_entry(swap));
+                                               swp_to_radix_entry(swap), gfp);
                        /*
                         * We already confirmed swap under page lock, and make
                         * no memory allocation here, so usually no possibility
@@ -1824,13 +1785,8 @@ alloc_nohuge:            page = shmem_alloc_and_acct_page(gfp, inode,
                                PageTransHuge(page));
                if (error)
                        goto unacct;
-               error = radix_tree_maybe_preload_order(gfp & GFP_RECLAIM_MASK,
-                               compound_order(page));
-               if (!error) {
-                       error = shmem_add_to_page_cache(page, mapping, hindex,
-                                                       NULL);
-                       radix_tree_preload_end();
-               }
+               error = shmem_add_to_page_cache(page, mapping, hindex,
+                                               NULL, gfp & GFP_RECLAIM_MASK);
                if (error) {
                        mem_cgroup_cancel_charge(page, memcg,
                                        PageTransHuge(page));
@@ -1931,7 +1887,7 @@ unlock:
                spin_unlock_irq(&info->lock);
                goto repeat;
        }
-       if (error == -EEXIST)   /* from above or from radix_tree_insert */
+       if (error == -EEXIST)
                goto repeat;
        return error;
 }
@@ -2299,11 +2255,8 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
        if (ret)
                goto out_release;
 
-       ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
-       if (!ret) {
-               ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
-               radix_tree_preload_end();
-       }
+       ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL,
+                                               gfp & GFP_RECLAIM_MASK);
        if (ret)
                goto out_release_uncharge;
 
@@ -2548,7 +2501,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 }
 
 /*
- * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
+ * llseek SEEK_DATA or SEEK_HOLE through the page cache.
  */
 static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
                                    pgoff_t index, pgoff_t end, int whence)
@@ -2578,7 +2531,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
                                index = indices[i];
                        }
                        page = pvec.pages[i];
-                       if (page && !radix_tree_exceptional_entry(page)) {
+                       if (page && !xa_is_value(page)) {
                                if (!PageUptodate(page))
                                        page = NULL;
                        }