nfsd: Remove callback_cred
[linux-2.6-microblaze.git] / fs / dax.c
index aa86d9f..6411928 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -351,6 +351,19 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
        }
 }
 
+static struct page *dax_busy_page(void *entry)
+{
+       unsigned long pfn;
+
+       for_each_mapped_pfn(entry, pfn) {
+               struct page *page = pfn_to_page(pfn);
+
+               if (page_ref_count(page) > 1)
+                       return page;
+       }
+       return NULL;
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -492,6 +505,90 @@ restart:
        return entry;
 }
 
+/**
+ * dax_layout_busy_page - find first pinned page in @mapping
+ * @mapping: address space to scan for a page with ref count > 1
+ *
+ * DAX requires ZONE_DEVICE mapped pages. These pages are never
+ * 'onlined' to the page allocator so they are considered idle when
+ * page->count == 1. A filesystem uses this interface to determine if
+ * any page in the mapping is busy, i.e. for DMA, or other
+ * get_user_pages() usages.
+ *
+ * It is expected that the filesystem is holding locks to block the
+ * establishment of new mappings in this address_space. I.e. it expects
+ * to be able to run unmap_mapping_range() and subsequently not race
+ * mapping_mapped() becoming true.
+ */
+struct page *dax_layout_busy_page(struct address_space *mapping)
+{
+       pgoff_t indices[PAGEVEC_SIZE];
+       struct page *page = NULL;
+       struct pagevec pvec;
+       pgoff_t index, end;
+       unsigned i;
+
+       /*
+        * In the 'limited' case get_user_pages() for dax is disabled.
+        */
+       if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+               return NULL;
+
+       if (!dax_mapping(mapping) || !mapping_mapped(mapping))
+               return NULL;
+
+       pagevec_init(&pvec);
+       index = 0;
+       end = -1;
+
+       /*
+        * If we race get_user_pages_fast() here either we'll see the
+        * elevated page count in the pagevec_lookup and wait, or
+        * get_user_pages_fast() will see that the page it took a reference
+        * against is no longer mapped in the page tables and bail to the
+        * get_user_pages() slow path.  The slow path is protected by
+        * pte_lock() and pmd_lock(). New references are not taken without
+        * holding those locks, and unmap_mapping_range() will not zero the
+        * pte or pmd without holding the respective lock, so we are
+        * guaranteed to either see new references or prevent new
+        * references from being established.
+        */
+       unmap_mapping_range(mapping, 0, 0, 1);
+
+       while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
+                               min(end - index, (pgoff_t)PAGEVEC_SIZE),
+                               indices)) {
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *pvec_ent = pvec.pages[i];
+                       void *entry;
+
+                       index = indices[i];
+                       if (index >= end)
+                               break;
+
+                       if (!radix_tree_exceptional_entry(pvec_ent))
+                               continue;
+
+                       xa_lock_irq(&mapping->i_pages);
+                       entry = get_unlocked_mapping_entry(mapping, index, NULL);
+                       if (entry)
+                               page = dax_busy_page(entry);
+                       put_unlocked_mapping_entry(mapping, index, entry);
+                       xa_unlock_irq(&mapping->i_pages);
+                       if (page)
+                               break;
+               }
+               pagevec_remove_exceptionals(&pvec);
+               pagevec_release(&pvec);
+               index++;
+
+               if (page)
+                       break;
+       }
+       return page;
+}
+EXPORT_SYMBOL_GPL(dax_layout_busy_page);
+
 static int __dax_invalidate_mapping_entry(struct address_space *mapping,
                                          pgoff_t index, bool trunc)
 {
@@ -905,14 +1002,13 @@ out:
  * If this page is ever written to we will re-fault and change the mapping to
  * point to real DAX storage instead.
  */
-static int dax_load_hole(struct address_space *mapping, void *entry,
+static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
                         struct vm_fault *vmf)
 {
        struct inode *inode = mapping->host;
        unsigned long vaddr = vmf->address;
-       int ret = VM_FAULT_NOPAGE;
+       vm_fault_t ret = VM_FAULT_NOPAGE;
        struct page *zero_page;
-       void *entry2;
        pfn_t pfn;
 
        zero_page = ZERO_PAGE(0);
@@ -922,14 +1018,9 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
        }
 
        pfn = page_to_pfn_t(zero_page);
-       entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
-                       RADIX_DAX_ZERO_PAGE, false);
-       if (IS_ERR(entry2)) {
-               ret = VM_FAULT_SIGBUS;
-               goto out;
-       }
-
-       vm_insert_mixed(vmf->vma, vaddr, pfn);
+       dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
+                       false);
+       ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
 out:
        trace_dax_load_hole(inode, vmf, ret);
        return ret;
@@ -991,6 +1082,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
        struct iov_iter *iter = data;
        loff_t end = pos + length, done = 0;
        ssize_t ret = 0;
+       size_t xfer;
        int id;
 
        if (iov_iter_rw(iter) == READ) {
@@ -1054,18 +1146,20 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
                 * vfs_write(), depending on which operation we are doing.
                 */
                if (iov_iter_rw(iter) == WRITE)
-                       map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr,
+                       xfer = dax_copy_from_iter(dax_dev, pgoff, kaddr,
                                        map_len, iter);
                else
-                       map_len = copy_to_iter(kaddr, map_len, iter);
-               if (map_len <= 0) {
-                       ret = map_len ? map_len : -EFAULT;
-                       break;
-               }
+                       xfer = dax_copy_to_iter(dax_dev, pgoff, kaddr,
+                                       map_len, iter);
+
+               pos += xfer;
+               length -= xfer;
+               done += xfer;
 
-               pos += map_len;
-               length -= map_len;
-               done += map_len;
+               if (xfer == 0)
+                       ret = -EFAULT;
+               if (xfer < map_len)
+                       break;
        }
        dax_read_unlock(id);
 
@@ -1112,7 +1206,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 }
 EXPORT_SYMBOL_GPL(dax_iomap_rw);
 
-static int dax_fault_return(int error)
+static vm_fault_t dax_fault_return(int error)
 {
        if (error == 0)
                return VM_FAULT_NOPAGE;
@@ -1132,7 +1226,7 @@ static bool dax_fault_is_synchronous(unsigned long flags,
                && (iomap->flags & IOMAP_F_DIRTY);
 }
 
-static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
+static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
                               int *iomap_errp, const struct iomap_ops *ops)
 {
        struct vm_area_struct *vma = vmf->vma;
@@ -1145,18 +1239,18 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
        int error, major = 0;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
        bool sync;
-       int vmf_ret = 0;
+       vm_fault_t ret = 0;
        void *entry;
        pfn_t pfn;
 
-       trace_dax_pte_fault(inode, vmf, vmf_ret);
+       trace_dax_pte_fault(inode, vmf, ret);
        /*
         * Check whether offset isn't beyond end of file now. Caller is supposed
         * to hold locks serializing us with truncate / punch hole so this is
         * a reliable test.
         */
        if (pos >= i_size_read(inode)) {
-               vmf_ret = VM_FAULT_SIGBUS;
+               ret = VM_FAULT_SIGBUS;
                goto out;
        }
 
@@ -1165,7 +1259,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 
        entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
        if (IS_ERR(entry)) {
-               vmf_ret = dax_fault_return(PTR_ERR(entry));
+               ret = dax_fault_return(PTR_ERR(entry));
                goto out;
        }
 
@@ -1176,7 +1270,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
         * retried.
         */
        if (pmd_trans_huge(*vmf->pmd) || pmd_devmap(*vmf->pmd)) {
-               vmf_ret = VM_FAULT_NOPAGE;
+               ret = VM_FAULT_NOPAGE;
                goto unlock_entry;
        }
 
@@ -1189,7 +1283,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
        if (iomap_errp)
                *iomap_errp = error;
        if (error) {
-               vmf_ret = dax_fault_return(error);
+               ret = dax_fault_return(error);
                goto unlock_entry;
        }
        if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
@@ -1219,9 +1313,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
                        goto error_finish_iomap;
 
                __SetPageUptodate(vmf->cow_page);
-               vmf_ret = finish_fault(vmf);
-               if (!vmf_ret)
-                       vmf_ret = VM_FAULT_DONE_COW;
+               ret = finish_fault(vmf);
+               if (!ret)
+                       ret = VM_FAULT_DONE_COW;
                goto finish_iomap;
        }
 
@@ -1240,10 +1334,6 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 
                entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                 0, write && !sync);
-               if (IS_ERR(entry)) {
-                       error = PTR_ERR(entry);
-                       goto error_finish_iomap;
-               }
 
                /*
                 * If we are doing synchronous page fault and inode needs fsync,
@@ -1257,23 +1347,20 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
                                goto error_finish_iomap;
                        }
                        *pfnp = pfn;
-                       vmf_ret = VM_FAULT_NEEDDSYNC | major;
+                       ret = VM_FAULT_NEEDDSYNC | major;
                        goto finish_iomap;
                }
                trace_dax_insert_mapping(inode, vmf, entry);
                if (write)
-                       error = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
+                       ret = vmf_insert_mixed_mkwrite(vma, vaddr, pfn);
                else
-                       error = vm_insert_mixed(vma, vaddr, pfn);
+                       ret = vmf_insert_mixed(vma, vaddr, pfn);
 
-               /* -EBUSY is fine, somebody else faulted on the same PTE */
-               if (error == -EBUSY)
-                       error = 0;
-               break;
+               goto finish_iomap;
        case IOMAP_UNWRITTEN:
        case IOMAP_HOLE:
                if (!write) {
-                       vmf_ret = dax_load_hole(mapping, entry, vmf);
+                       ret = dax_load_hole(mapping, entry, vmf);
                        goto finish_iomap;
                }
                /*FALLTHRU*/
@@ -1284,12 +1371,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
        }
 
  error_finish_iomap:
-       vmf_ret = dax_fault_return(error) | major;
+       ret = dax_fault_return(error);
  finish_iomap:
        if (ops->iomap_end) {
                int copied = PAGE_SIZE;
 
-               if (vmf_ret & VM_FAULT_ERROR)
+               if (ret & VM_FAULT_ERROR)
                        copied = 0;
                /*
                 * The fault is done by now and there's no way back (other
@@ -1302,12 +1389,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
  unlock_entry:
        put_locked_mapping_entry(mapping, vmf->pgoff);
  out:
-       trace_dax_pte_fault_done(inode, vmf, vmf_ret);
-       return vmf_ret;
+       trace_dax_pte_fault_done(inode, vmf, ret);
+       return ret | major;
 }
 
 #ifdef CONFIG_FS_DAX_PMD
-static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
+static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
                void *entry)
 {
        struct address_space *mapping = vmf->vma->vm_file->f_mapping;
@@ -1327,8 +1414,6 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
        pfn = page_to_pfn_t(zero_page);
        ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                        RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
-       if (IS_ERR(ret))
-               goto fallback;
 
        ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
        if (!pmd_none(*(vmf->pmd))) {
@@ -1348,7 +1433,7 @@ fallback:
        return VM_FAULT_FALLBACK;
 }
 
-static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
+static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
                               const struct iomap_ops *ops)
 {
        struct vm_area_struct *vma = vmf->vma;
@@ -1358,7 +1443,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
        bool sync;
        unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
        struct inode *inode = mapping->host;
-       int result = VM_FAULT_FALLBACK;
+       vm_fault_t result = VM_FAULT_FALLBACK;
        struct iomap iomap = { 0 };
        pgoff_t max_pgoff, pgoff;
        void *entry;
@@ -1450,8 +1535,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 
                entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
                                                RADIX_DAX_PMD, write && !sync);
-               if (IS_ERR(entry))
-                       goto finish_iomap;
 
                /*
                 * If we are doing synchronous page fault and inode needs fsync,
@@ -1509,7 +1592,7 @@ out:
        return result;
 }
 #else
-static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
+static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
                               const struct iomap_ops *ops)
 {
        return VM_FAULT_FALLBACK;
@@ -1529,7 +1612,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
  * has done all the necessary locking for page fault to proceed
  * successfully.
  */
-int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
+vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
                    pfn_t *pfnp, int *iomap_errp, const struct iomap_ops *ops)
 {
        switch (pe_size) {
@@ -1553,14 +1636,14 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
  * DAX file.  It takes care of marking corresponding radix tree entry as dirty
  * as well.
  */
-static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
+static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf,
                                  enum page_entry_size pe_size,
                                  pfn_t pfn)
 {
        struct address_space *mapping = vmf->vma->vm_file->f_mapping;
        void *entry, **slot;
        pgoff_t index = vmf->pgoff;
-       int vmf_ret, error;
+       vm_fault_t ret;
 
        xa_lock_irq(&mapping->i_pages);
        entry = get_unlocked_mapping_entry(mapping, index, &slot);
@@ -1579,21 +1662,20 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
        xa_unlock_irq(&mapping->i_pages);
        switch (pe_size) {
        case PE_SIZE_PTE:
-               error = vm_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
-               vmf_ret = dax_fault_return(error);
+               ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
                break;
 #ifdef CONFIG_FS_DAX_PMD
        case PE_SIZE_PMD:
-               vmf_ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+               ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
                        pfn, true);
                break;
 #endif
        default:
-               vmf_ret = VM_FAULT_FALLBACK;
+               ret = VM_FAULT_FALLBACK;
        }
        put_locked_mapping_entry(mapping, index);
-       trace_dax_insert_pfn_mkwrite(mapping->host, vmf, vmf_ret);
-       return vmf_ret;
+       trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
+       return ret;
 }
 
 /**
@@ -1606,8 +1688,8 @@ static int dax_insert_pfn_mkwrite(struct vm_fault *vmf,
  * stored persistently on the media and handles inserting of appropriate page
  * table entry.
  */
-int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
-                         pfn_t pfn)
+vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
+               enum page_entry_size pe_size, pfn_t pfn)
 {
        int err;
        loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT;