introduce copy_page_to_iter, kill loop over iovec in generic_file_aio_read()
authorAl Viro <viro@zeniv.linux.org.uk>
Mon, 3 Feb 2014 22:07:03 +0000 (17:07 -0500)
committerAl Viro <viro@zeniv.linux.org.uk>
Wed, 2 Apr 2014 03:19:21 +0000 (23:19 -0400)
generic_file_aio_read() was looping over the target iovec, with loop over
(source) pages nested inside that.  Just set an iov_iter up and pass *that*
to do_generic_file_aio_read().  With copy_page_to_iter() doing all work
of mapping and copying a page to iovec and advancing iov_iter.

Switch shmem_file_aio_read() to the same and kill file_read_actor(), while
we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
include/linux/fs.h
include/linux/uio.h
mm/filemap.c
mm/shmem.c

index 2261ac8..2a5b174 100644 (file)
@@ -2390,7 +2390,6 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
                unsigned long size, pgoff_t pgoff);
-extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
index 347d70c..199bcc3 100644 (file)
@@ -67,6 +67,8 @@ size_t iov_iter_copy_from_user(struct page *page,
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i);
 
 static inline void iov_iter_init(struct iov_iter *i,
                        const struct iovec *iov, unsigned long nr_segs,
index bfb7a97..a16eb2c 100644 (file)
@@ -1085,11 +1085,90 @@ static void shrink_readahead_size_eio(struct file *filp,
        ra->ra_pages /= 4;
 }
 
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       size_t skip, copy, left, wanted;
+       const struct iovec *iov;
+       char __user *buf;
+       void *kaddr, *from;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       wanted = bytes;
+       iov = i->iov;
+       skip = i->iov_offset;
+       buf = iov->iov_base + skip;
+       copy = min(bytes, iov->iov_len - skip);
+
+       if (!fault_in_pages_writeable(buf, copy)) {
+               kaddr = kmap_atomic(page);
+               from = kaddr + offset;
+
+               /* first chunk, usually the only one */
+               left = __copy_to_user_inatomic(buf, from, copy);
+               copy -= left;
+               skip += copy;
+               from += copy;
+               bytes -= copy;
+
+               while (unlikely(!left && bytes)) {
+                       iov++;
+                       buf = iov->iov_base;
+                       copy = min(bytes, iov->iov_len);
+                       left = __copy_to_user_inatomic(buf, from, copy);
+                       copy -= left;
+                       skip = copy;
+                       from += copy;
+                       bytes -= copy;
+               }
+               if (likely(!bytes)) {
+                       kunmap_atomic(kaddr);
+                       goto done;
+               }
+               offset = from - kaddr;
+               buf += copy;
+               kunmap_atomic(kaddr);
+               copy = min(bytes, iov->iov_len - skip);
+       }
+       /* Too bad - revert to non-atomic kmap */
+       kaddr = kmap(page);
+       from = kaddr + offset;
+       left = __copy_to_user(buf, from, copy);
+       copy -= left;
+       skip += copy;
+       from += copy;
+       bytes -= copy;
+       while (unlikely(!left && bytes)) {
+               iov++;
+               buf = iov->iov_base;
+               copy = min(bytes, iov->iov_len);
+               left = __copy_to_user(buf, from, copy);
+               copy -= left;
+               skip = copy;
+               from += copy;
+               bytes -= copy;
+       }
+       kunmap(page);
+done:
+       i->count -= wanted - bytes;
+       i->nr_segs -= iov - i->iov;
+       i->iov = iov;
+       i->iov_offset = skip;
+       return wanted - bytes;
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
 /**
  * do_generic_file_read - generic file read routine
  * @filp:      the file to read
  * @ppos:      current file position
- * @desc:      read_descriptor
+ * @iter:      data destination
+ * @written:   already copied
  *
  * This is a generic file read routine, and uses the
  * mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1097,8 +1176,8 @@ static void shrink_readahead_size_eio(struct file *filp,
  * This is really ugly. But the goto's actually try to clarify some
  * of the logic when it comes to error handling etc.
  */
-static void do_generic_file_read(struct file *filp, loff_t *ppos,
-               read_descriptor_t *desc)
+static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
+               struct iov_iter *iter, ssize_t written)
 {
        struct address_space *mapping = filp->f_mapping;
        struct inode *inode = mapping->host;
@@ -1108,12 +1187,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos,
        pgoff_t prev_index;
        unsigned long offset;      /* offset into pagecache page */
        unsigned int prev_offset;
-       int error;
+       int error = 0;
 
        index = *ppos >> PAGE_CACHE_SHIFT;
        prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
        prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
-       last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
+       last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
 
        for (;;) {
@@ -1148,7 +1227,7 @@ find_page:
                        if (!page->mapping)
                                goto page_not_up_to_date_locked;
                        if (!mapping->a_ops->is_partially_uptodate(page,
-                                                       offset, desc->count))
+                                                       offset, iter->count))
                                goto page_not_up_to_date_locked;
                        unlock_page(page);
                }
@@ -1198,24 +1277,23 @@ page_ok:
                /*
                 * Ok, we have the page, and it's up-to-date, so
                 * now we can copy it to user space...
-                *
-                * The file_read_actor routine returns how many bytes were
-                * actually used..
-                * NOTE! This may not be the same as how much of a user buffer
-                * we filled up (we may be padding etc), so we can only update
-                * "pos" here (the actor routine has to update the user buffer
-                * pointers and the remaining count).
                 */
-               ret = file_read_actor(desc, page, offset, nr);
+
+               ret = copy_page_to_iter(page, offset, nr, iter);
                offset += ret;
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
                prev_offset = offset;
 
                page_cache_release(page);
-               if (ret == nr && desc->count)
-                       continue;
-               goto out;
+               written += ret;
+               if (!iov_iter_count(iter))
+                       goto out;
+               if (ret < nr) {
+                       error = -EFAULT;
+                       goto out;
+               }
+               continue;
 
 page_not_up_to_date:
                /* Get exclusive access to the page ... */
@@ -1250,6 +1328,7 @@ readpage:
                if (unlikely(error)) {
                        if (error == AOP_TRUNCATED_PAGE) {
                                page_cache_release(page);
+                               error = 0;
                                goto find_page;
                        }
                        goto readpage_error;
@@ -1280,7 +1359,6 @@ readpage:
 
 readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
-               desc->error = error;
                page_cache_release(page);
                goto out;
 
@@ -1291,16 +1369,17 @@ no_cached_page:
                 */
                page = page_cache_alloc_cold(mapping);
                if (!page) {
-                       desc->error = -ENOMEM;
+                       error = -ENOMEM;
                        goto out;
                }
                error = add_to_page_cache_lru(page, mapping,
                                                index, GFP_KERNEL);
                if (error) {
                        page_cache_release(page);
-                       if (error == -EEXIST)
+                       if (error == -EEXIST) {
+                               error = 0;
                                goto find_page;
-                       desc->error = error;
+                       }
                        goto out;
                }
                goto readpage;
@@ -1313,44 +1392,7 @@ out:
 
        *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
        file_accessed(filp);
-}
-
-int file_read_actor(read_descriptor_t *desc, struct page *page,
-                       unsigned long offset, unsigned long size)
-{
-       char *kaddr;
-       unsigned long left, count = desc->count;
-
-       if (size > count)
-               size = count;
-
-       /*
-        * Faults on the destination of a read are common, so do it before
-        * taking the kmap.
-        */
-       if (!fault_in_pages_writeable(desc->arg.buf, size)) {
-               kaddr = kmap_atomic(page);
-               left = __copy_to_user_inatomic(desc->arg.buf,
-                                               kaddr + offset, size);
-               kunmap_atomic(kaddr);
-               if (left == 0)
-                       goto success;
-       }
-
-       /* Do it the slow way */
-       kaddr = kmap(page);
-       left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
-       kunmap(page);
-
-       if (left) {
-               size -= left;
-               desc->error = -EFAULT;
-       }
-success:
-       desc->count = count - size;
-       desc->written += size;
-       desc->arg.buf += size;
-       return size;
+       return written ? written : error;
 }
 
 /*
@@ -1408,14 +1450,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *filp = iocb->ki_filp;
        ssize_t retval;
-       unsigned long seg = 0;
        size_t count;
        loff_t *ppos = &iocb->ki_pos;
+       struct iov_iter i;
 
        count = 0;
        retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
        if (retval)
                return retval;
+       iov_iter_init(&i, iov, nr_segs, count, 0);
 
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (filp->f_flags & O_DIRECT) {
@@ -1437,6 +1480,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                if (retval > 0) {
                        *ppos = pos + retval;
                        count -= retval;
+                       /*
+                        * If we did a short DIO read we need to skip the
+                        * section of the iov that we've already read data into.
+                        */
+                       iov_iter_advance(&i, retval);
                }
 
                /*
@@ -1453,39 +1501,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                }
        }
 
-       count = retval;
-       for (seg = 0; seg < nr_segs; seg++) {
-               read_descriptor_t desc;
-               loff_t offset = 0;
-
-               /*
-                * If we did a short DIO read we need to skip the section of the
-                * iov that we've already read data into.
-                */
-               if (count) {
-                       if (count > iov[seg].iov_len) {
-                               count -= iov[seg].iov_len;
-                               continue;
-                       }
-                       offset = count;
-                       count = 0;
-               }
-
-               desc.written = 0;
-               desc.arg.buf = iov[seg].iov_base + offset;
-               desc.count = iov[seg].iov_len - offset;
-               if (desc.count == 0)
-                       continue;
-               desc.error = 0;
-               do_generic_file_read(filp, ppos, &desc);
-               retval += desc.written;
-               if (desc.error) {
-                       retval = retval ?: desc.error;
-                       break;
-               }
-               if (desc.count > 0)
-                       break;
-       }
+       retval = do_generic_file_read(filp, ppos, &i, retval);
 out:
        return retval;
 }
index 9398e6c..17d3799 100644 (file)
@@ -1462,13 +1462,25 @@ shmem_write_end(struct file *file, struct address_space *mapping,
        return copied;
 }
 
-static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
+static ssize_t shmem_file_aio_read(struct kiocb *iocb,
+               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
 {
-       struct inode *inode = file_inode(filp);
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
        struct address_space *mapping = inode->i_mapping;
        pgoff_t index;
        unsigned long offset;
        enum sgp_type sgp = SGP_READ;
+       int error;
+       ssize_t retval;
+       size_t count;
+       loff_t *ppos = &iocb->ki_pos;
+       struct iov_iter iter;
+
+       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+       if (retval)
+               return retval;
+       iov_iter_init(&iter, iov, nr_segs, count, 0);
 
        /*
         * Might this read be for a stacking filesystem?  Then when reading
@@ -1496,10 +1508,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
                                break;
                }
 
-               desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
-               if (desc->error) {
-                       if (desc->error == -EINVAL)
-                               desc->error = 0;
+               error = shmem_getpage(inode, index, &page, sgp, NULL);
+               if (error) {
+                       if (error == -EINVAL)
+                               error = 0;
                        break;
                }
                if (page)
@@ -1543,61 +1555,26 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
                /*
                 * Ok, we have the page, and it's up-to-date, so
                 * now we can copy it to user space...
-                *
-                * The actor routine returns how many bytes were actually used..
-                * NOTE! This may not be the same as how much of a user buffer
-                * we filled up (we may be padding etc), so we can only update
-                * "pos" here (the actor routine has to update the user buffer
-                * pointers and the remaining count).
                 */
-               ret = file_read_actor(desc, page, offset, nr);
+               ret = copy_page_to_iter(page, offset, nr, &iter);
+               retval += ret;
                offset += ret;
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
 
                page_cache_release(page);
-               if (ret != nr || !desc->count)
+               if (!iov_iter_count(&iter))
                        break;
-
+               if (ret < nr) {
+                       error = -EFAULT;
+                       break;
+               }
                cond_resched();
        }
 
        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
-       file_accessed(filp);
-}
-
-static ssize_t shmem_file_aio_read(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
-{
-       struct file *filp = iocb->ki_filp;
-       ssize_t retval;
-       unsigned long seg;
-       size_t count;
-       loff_t *ppos = &iocb->ki_pos;
-
-       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-       if (retval)
-               return retval;
-
-       for (seg = 0; seg < nr_segs; seg++) {
-               read_descriptor_t desc;
-
-               desc.written = 0;
-               desc.arg.buf = iov[seg].iov_base;
-               desc.count = iov[seg].iov_len;
-               if (desc.count == 0)
-                       continue;
-               desc.error = 0;
-               do_shmem_file_read(filp, ppos, &desc);
-               retval += desc.written;
-               if (desc.error) {
-                       retval = retval ?: desc.error;
-                       break;
-               }
-               if (desc.count > 0)
-                       break;
-       }
-       return retval;
+       file_accessed(file);
+       return retval ? retval : error;
 }
 
 static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,