perf env: Conditionally compile BPF support code on having HAVE_LIBBPF_SUPPORT
[linux-2.6-microblaze.git] / mm / filemap.c
index 38546dc..d5e7c20 100644 (file)
@@ -249,7 +249,7 @@ static void page_cache_free_page(struct address_space *mapping,
                freepage(page);
 
        if (PageTransHuge(page) && !PageHuge(page)) {
-               page_ref_sub(page, HPAGE_PMD_NR);
+               page_ref_sub(page, thp_nr_pages(page));
                VM_BUG_ON_PAGE(page_count(page) <= 0, page);
        } else {
                put_page(page);
@@ -827,15 +827,14 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_page);
 
-static int __add_to_page_cache_locked(struct page *page,
-                                     struct address_space *mapping,
-                                     pgoff_t offset, gfp_t gfp_mask,
-                                     void **shadowp)
+noinline int __add_to_page_cache_locked(struct page *page,
+                                       struct address_space *mapping,
+                                       pgoff_t offset, gfp_t gfp,
+                                       void **shadowp)
 {
        XA_STATE(xas, &mapping->i_pages, offset);
        int huge = PageHuge(page);
        int error;
-       void *old;
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageSwapBacked(page), page);
@@ -846,25 +845,46 @@ static int __add_to_page_cache_locked(struct page *page,
        page->index = offset;
 
        if (!huge) {
-               error = mem_cgroup_charge(page, current->mm, gfp_mask);
+               error = mem_cgroup_charge(page, current->mm, gfp);
                if (error)
                        goto error;
        }
 
+       gfp &= GFP_RECLAIM_MASK;
+
        do {
+               unsigned int order = xa_get_order(xas.xa, xas.xa_index);
+               void *entry, *old = NULL;
+
+               if (order > thp_order(page))
+                       xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
+                                       order, gfp);
                xas_lock_irq(&xas);
-               old = xas_load(&xas);
-               if (old && !xa_is_value(old))
-                       xas_set_err(&xas, -EEXIST);
+               xas_for_each_conflict(&xas, entry) {
+                       old = entry;
+                       if (!xa_is_value(entry)) {
+                               xas_set_err(&xas, -EEXIST);
+                               goto unlock;
+                       }
+               }
+
+               if (old) {
+                       if (shadowp)
+                               *shadowp = old;
+                       /* entry may have been split before we acquired lock */
+                       order = xa_get_order(xas.xa, xas.xa_index);
+                       if (order > thp_order(page)) {
+                               xas_split(&xas, old, order);
+                               xas_reset(&xas);
+                       }
+               }
+
                xas_store(&xas, page);
                if (xas_error(&xas))
                        goto unlock;
 
-               if (xa_is_value(old)) {
+               if (old)
                        mapping->nrexceptional--;
-                       if (shadowp)
-                               *shadowp = old;
-               }
                mapping->nrpages++;
 
                /* hugetlb pages do not participate in page cache accounting */
@@ -872,7 +892,7 @@ static int __add_to_page_cache_locked(struct page *page,
                        __inc_lruvec_page_state(page, NR_FILE_PAGES);
 unlock:
                xas_unlock_irq(&xas);
-       } while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+       } while (xas_nomem(&xas, gfp));
 
        if (xas_error(&xas)) {
                error = xas_error(&xas);
@@ -1425,7 +1445,7 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
  * unlock_page - unlock a locked page
  * @page: the page
  *
- * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
+ * Unlocks the page and wakes up sleepers in wait_on_page_locked().
  * Also wakes sleepers in wait_on_page_writeback() because the wakeup
  * mechanism between PageLocked pages and PageWriteback pages is shared.
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
@@ -2179,6 +2199,14 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
        last_index = (*ppos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
        offset = *ppos & ~PAGE_MASK;
 
+       /*
+        * If we've already successfully copied some data, then we
+        * can no longer safely return -EIOCBQUEUED. Hence mark
+        * an async read NOWAIT at that point.
+        */
+       if (written && (iocb->ki_flags & IOCB_WAITQ))
+               iocb->ki_flags |= IOCB_NOWAIT;
+
        for (;;) {
                struct page *page;
                pgoff_t end_index;
@@ -2568,8 +2596,8 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
        struct file *file = vmf->vma->vm_file;
        struct file_ra_state *ra = &file->f_ra;
        struct address_space *mapping = file->f_mapping;
+       DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
        struct file *fpin = NULL;
-       pgoff_t offset = vmf->pgoff;
        unsigned int mmap_miss;
 
        /* If we don't want any read-ahead, don't bother */
@@ -2580,8 +2608,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
 
        if (vmf->vma->vm_flags & VM_SEQ_READ) {
                fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-               page_cache_sync_readahead(mapping, ra, file, offset,
-                                         ra->ra_pages);
+               page_cache_sync_ra(&ractl, ra, ra->ra_pages);
                return fpin;
        }
 
@@ -2601,10 +2628,11 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
         * mmap read-around
         */
        fpin = maybe_unlock_mmap_for_io(vmf, fpin);
-       ra->start = max_t(long, 0, offset - ra->ra_pages / 2);
+       ra->start = max_t(long, 0, vmf->pgoff - ra->ra_pages / 2);
        ra->size = ra->ra_pages;
        ra->async_size = ra->ra_pages / 4;
-       ra_submit(ra, mapping, file);
+       ractl._index = ra->start;
+       do_page_cache_ra(&ractl, ra->size, ra->async_size);
        return fpin;
 }
 
@@ -2984,7 +3012,7 @@ filler:
                goto out;
 
        /*
-        * Page is not up to date and may be locked due one of the following
+        * Page is not up to date and may be locked due to one of the following
         * case a: Page is being filled and the page lock is held
         * case b: Read/write error clearing the page uptodate status
         * case c: Truncation in progress (page locked)
@@ -3093,228 +3121,6 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
-/*
- * Don't operate on ranges the page cache doesn't support, and don't exceed the
- * LFS limits.  If pos is under the limit it becomes a short access.  If it
- * exceeds the limit we return -EFBIG.
- */
-static int generic_write_check_limits(struct file *file, loff_t pos,
-                                     loff_t *count)
-{
-       struct inode *inode = file->f_mapping->host;
-       loff_t max_size = inode->i_sb->s_maxbytes;
-       loff_t limit = rlimit(RLIMIT_FSIZE);
-
-       if (limit != RLIM_INFINITY) {
-               if (pos >= limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
-               *count = min(*count, limit - pos);
-       }
-
-       if (!(file->f_flags & O_LARGEFILE))
-               max_size = MAX_NON_LFS;
-
-       if (unlikely(pos >= max_size))
-               return -EFBIG;
-
-       *count = min(*count, max_size - pos);
-
-       return 0;
-}
-
-/*
- * Performs necessary checks before doing a write
- *
- * Can adjust writing position or amount of bytes to write.
- * Returns appropriate error code that caller should return or
- * zero in case that write should be allowed.
- */
-inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
-{
-       struct file *file = iocb->ki_filp;
-       struct inode *inode = file->f_mapping->host;
-       loff_t count;
-       int ret;
-
-       if (IS_SWAPFILE(inode))
-               return -ETXTBSY;
-
-       if (!iov_iter_count(from))
-               return 0;
-
-       /* FIXME: this is for backwards compatibility with 2.4 */
-       if (iocb->ki_flags & IOCB_APPEND)
-               iocb->ki_pos = i_size_read(inode);
-
-       if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
-               return -EINVAL;
-
-       count = iov_iter_count(from);
-       ret = generic_write_check_limits(file, iocb->ki_pos, &count);
-       if (ret)
-               return ret;
-
-       iov_iter_truncate(from, count);
-       return iov_iter_count(from);
-}
-EXPORT_SYMBOL(generic_write_checks);
-
-/*
- * Performs necessary checks before doing a clone.
- *
- * Can adjust amount of bytes to clone via @req_count argument.
- * Returns appropriate error code that caller should return or
- * zero in case the clone should be allowed.
- */
-int generic_remap_checks(struct file *file_in, loff_t pos_in,
-                        struct file *file_out, loff_t pos_out,
-                        loff_t *req_count, unsigned int remap_flags)
-{
-       struct inode *inode_in = file_in->f_mapping->host;
-       struct inode *inode_out = file_out->f_mapping->host;
-       uint64_t count = *req_count;
-       uint64_t bcount;
-       loff_t size_in, size_out;
-       loff_t bs = inode_out->i_sb->s_blocksize;
-       int ret;
-
-       /* The start of both ranges must be aligned to an fs block. */
-       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
-               return -EINVAL;
-
-       /* Ensure offsets don't wrap. */
-       if (pos_in + count < pos_in || pos_out + count < pos_out)
-               return -EINVAL;
-
-       size_in = i_size_read(inode_in);
-       size_out = i_size_read(inode_out);
-
-       /* Dedupe requires both ranges to be within EOF. */
-       if ((remap_flags & REMAP_FILE_DEDUP) &&
-           (pos_in >= size_in || pos_in + count > size_in ||
-            pos_out >= size_out || pos_out + count > size_out))
-               return -EINVAL;
-
-       /* Ensure the infile range is within the infile. */
-       if (pos_in >= size_in)
-               return -EINVAL;
-       count = min(count, size_in - (uint64_t)pos_in);
-
-       ret = generic_write_check_limits(file_out, pos_out, &count);
-       if (ret)
-               return ret;
-
-       /*
-        * If the user wanted us to link to the infile's EOF, round up to the
-        * next block boundary for this check.
-        *
-        * Otherwise, make sure the count is also block-aligned, having
-        * already confirmed the starting offsets' block alignment.
-        */
-       if (pos_in + count == size_in) {
-               bcount = ALIGN(size_in, bs) - pos_in;
-       } else {
-               if (!IS_ALIGNED(count, bs))
-                       count = ALIGN_DOWN(count, bs);
-               bcount = count;
-       }
-
-       /* Don't allow overlapped cloning within the same file. */
-       if (inode_in == inode_out &&
-           pos_out + bcount > pos_in &&
-           pos_out < pos_in + bcount)
-               return -EINVAL;
-
-       /*
-        * We shortened the request but the caller can't deal with that, so
-        * bounce the request back to userspace.
-        */
-       if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
-               return -EINVAL;
-
-       *req_count = count;
-       return 0;
-}
-
-
-/*
- * Performs common checks before doing a file copy/clone
- * from @file_in to @file_out.
- */
-int generic_file_rw_checks(struct file *file_in, struct file *file_out)
-{
-       struct inode *inode_in = file_inode(file_in);
-       struct inode *inode_out = file_inode(file_out);
-
-       /* Don't copy dirs, pipes, sockets... */
-       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-               return -EISDIR;
-       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-               return -EINVAL;
-
-       if (!(file_in->f_mode & FMODE_READ) ||
-           !(file_out->f_mode & FMODE_WRITE) ||
-           (file_out->f_flags & O_APPEND))
-               return -EBADF;
-
-       return 0;
-}
-
-/*
- * Performs necessary checks before doing a file copy
- *
- * Can adjust amount of bytes to copy via @req_count argument.
- * Returns appropriate error code that caller should return or
- * zero in case the copy should be allowed.
- */
-int generic_copy_file_checks(struct file *file_in, loff_t pos_in,
-                            struct file *file_out, loff_t pos_out,
-                            size_t *req_count, unsigned int flags)
-{
-       struct inode *inode_in = file_inode(file_in);
-       struct inode *inode_out = file_inode(file_out);
-       uint64_t count = *req_count;
-       loff_t size_in;
-       int ret;
-
-       ret = generic_file_rw_checks(file_in, file_out);
-       if (ret)
-               return ret;
-
-       /* Don't touch certain kinds of inodes */
-       if (IS_IMMUTABLE(inode_out))
-               return -EPERM;
-
-       if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
-               return -ETXTBSY;
-
-       /* Ensure offsets don't wrap. */
-       if (pos_in + count < pos_in || pos_out + count < pos_out)
-               return -EOVERFLOW;
-
-       /* Shorten the copy to EOF */
-       size_in = i_size_read(inode_in);
-       if (pos_in >= size_in)
-               count = 0;
-       else
-               count = min(count, size_in - (uint64_t)pos_in);
-
-       ret = generic_write_check_limits(file_out, pos_out, &count);
-       if (ret)
-               return ret;
-
-       /* Don't allow overlapped copying within the same file. */
-       if (inode_in == inode_out &&
-           pos_out + count > pos_in &&
-           pos_out < pos_in + count)
-               return -EINVAL;
-
-       *req_count = count;
-       return 0;
-}
-
 int pagecache_write_begin(struct file *file, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned flags,
                                struct page **pagep, void **fsdata)