Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 15 Jun 2014 00:48:43 +0000 (19:48 -0500)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 15 Jun 2014 00:48:43 +0000 (19:48 -0500)
Pull more btrfs updates from Chris Mason:
 "This has a few fixes since our last pull and a new ioctl for doing
  btree searches from userland.  It's very similar to the existing
  ioctl, but lets us return larger items back down to the app"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: fix error handling in create_pending_snapshot
  btrfs: fix use of uninit "ret" in end_extent_writepage()
  btrfs: free ulist in qgroup_shared_accounting() error path
  Btrfs: fix qgroups sanity test crash or hang
  btrfs: prevent RCU warning when dereferencing radix tree slot
  Btrfs: fix unfinished readahead thread for raid5/6 degraded mounting
  btrfs: new ioctl TREE_SEARCH_V2
  btrfs: tree_search, search_ioctl: direct copy to userspace
  btrfs: new function read_extent_buffer_to_user
  btrfs: tree_search, copy_to_sk: return needed size on EOVERFLOW
  btrfs: tree_search, copy_to_sk: return EOVERFLOW for too small buffer
  btrfs: tree_search, search_ioctl: accept varying buffer
  btrfs: tree_search: eliminate redundant nr_items check

1  2 
fs/btrfs/extent_io.c
fs/btrfs/ioctl.c

diff --combined fs/btrfs/extent_io.c
@@@ -2354,7 -2354,7 +2354,7 @@@ int end_extent_writepage(struct page *p
  {
        int uptodate = (err == 0);
        struct extent_io_tree *tree;
-       int ret;
+       int ret = 0;
  
        tree = &BTRFS_I(page->mapping->host)->io_tree;
  
@@@ -3523,7 -3523,7 +3523,7 @@@ lock_extent_buffer_for_io(struct extent
  static void end_extent_buffer_writeback(struct extent_buffer *eb)
  {
        clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
 -      smp_mb__after_clear_bit();
 +      smp_mb__after_atomic();
        wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
  }
  
@@@ -4576,8 -4576,7 +4576,8 @@@ static void check_buffer_tree_ref(struc
        spin_unlock(&eb->refs_lock);
  }
  
 -static void mark_extent_buffer_accessed(struct extent_buffer *eb)
 +static void mark_extent_buffer_accessed(struct extent_buffer *eb,
 +              struct page *accessed)
  {
        unsigned long num_pages, i;
  
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++) {
                struct page *p = extent_buffer_page(eb, i);
 -              mark_page_accessed(p);
 +              if (p != accessed)
 +                      mark_page_accessed(p);
        }
  }
  
@@@ -4601,7 -4599,7 +4601,7 @@@ struct extent_buffer *find_extent_buffe
                               start >> PAGE_CACHE_SHIFT);
        if (eb && atomic_inc_not_zero(&eb->refs)) {
                rcu_read_unlock();
 -              mark_extent_buffer_accessed(eb);
 +              mark_extent_buffer_accessed(eb, NULL);
                return eb;
        }
        rcu_read_unlock();
@@@ -4696,7 -4694,7 +4696,7 @@@ struct extent_buffer *alloc_extent_buff
                                spin_unlock(&mapping->private_lock);
                                unlock_page(p);
                                page_cache_release(p);
 -                              mark_extent_buffer_accessed(exists);
 +                              mark_extent_buffer_accessed(exists, p);
                                goto free_eb;
                        }
  
                attach_extent_buffer_page(eb, p);
                spin_unlock(&mapping->private_lock);
                WARN_ON(PageDirty(p));
 -              mark_page_accessed(p);
                eb->pages[i] = p;
                if (!PageUptodate(p))
                        uptodate = 0;
@@@ -5068,6 -5067,43 +5068,43 @@@ void read_extent_buffer(struct extent_b
        }
  }
  
+ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
+                       unsigned long start,
+                       unsigned long len)
+ {
+       size_t cur;
+       size_t offset;
+       struct page *page;
+       char *kaddr;
+       char __user *dst = (char __user *)dstv;
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+       int ret = 0;
+       WARN_ON(start > eb->len);
+       WARN_ON(start + len > eb->start + eb->len);
+       offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+       while (len > 0) {
+               page = extent_buffer_page(eb, i);
+               cur = min(len, (PAGE_CACHE_SIZE - offset));
+               kaddr = page_address(page);
+               if (copy_to_user(dst, kaddr + offset, cur)) {
+                       ret = -EFAULT;
+                       break;
+               }
+               dst += cur;
+               len -= cur;
+               offset = 0;
+               i++;
+       }
+       return ret;
+ }
  int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
                               unsigned long min_len, char **map,
                               unsigned long *map_start,
diff --combined fs/btrfs/ioctl.c
@@@ -643,7 -643,7 +643,7 @@@ static int create_snapshot(struct btrfs
                return -EINVAL;
  
        atomic_inc(&root->will_be_snapshoted);
 -      smp_mb__after_atomic_inc();
 +      smp_mb__after_atomic();
        btrfs_wait_nocow_write(root);
  
        ret = btrfs_start_delalloc_inodes(root, 0);
@@@ -1957,7 -1957,8 +1957,8 @@@ static noinline int copy_to_sk(struct b
                               struct btrfs_path *path,
                               struct btrfs_key *key,
                               struct btrfs_ioctl_search_key *sk,
-                              char *buf,
+                              size_t *buf_size,
+                              char __user *ubuf,
                               unsigned long *sk_offset,
                               int *num_found)
  {
                if (!key_in_sk(key, sk))
                        continue;
  
-               if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
+               if (sizeof(sh) + item_len > *buf_size) {
+                       if (*num_found) {
+                               ret = 1;
+                               goto out;
+                       }
+                       /*
+                        * return one empty item back for v1, which does not
+                        * handle -EOVERFLOW
+                        */
+                       *buf_size = sizeof(sh) + item_len;
                        item_len = 0;
+                       ret = -EOVERFLOW;
+               }
  
-               if (sizeof(sh) + item_len + *sk_offset >
-                   BTRFS_SEARCH_ARGS_BUFSIZE) {
+               if (sizeof(sh) + item_len + *sk_offset > *buf_size) {
                        ret = 1;
-                       goto overflow;
+                       goto out;
                }
  
                sh.objectid = key->objectid;
                sh.transid = found_transid;
  
                /* copy search result header */
-               memcpy(buf + *sk_offset, &sh, sizeof(sh));
+               if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) {
+                       ret = -EFAULT;
+                       goto out;
+               }
                *sk_offset += sizeof(sh);
  
                if (item_len) {
-                       char *p = buf + *sk_offset;
+                       char __user *up = ubuf + *sk_offset;
                        /* copy the item */
-                       read_extent_buffer(leaf, p,
-                                          item_off, item_len);
+                       if (read_extent_buffer_to_user(leaf, up,
+                                                      item_off, item_len)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
                        *sk_offset += item_len;
                }
                (*num_found)++;
  
-               if (*num_found >= sk->nr_items)
-                       break;
+               if (ret) /* -EOVERFLOW from above */
+                       goto out;
+               if (*num_found >= sk->nr_items) {
+                       ret = 1;
+                       goto out;
+               }
        }
  advance_key:
        ret = 0;
                key->objectid++;
        } else
                ret = 1;
- overflow:
+ out:
+       /*
+        *  0: all items from this leaf copied, continue with next
+        *  1: * more items can be copied, but unused buffer is too small
+        *     * all items were found
+        *     Either way, it will stops the loop which iterates to the next
+        *     leaf
+        *  -EOVERFLOW: item was to large for buffer
+        *  -EFAULT: could not copy extent buffer back to userspace
+        */
        return ret;
  }
  
  static noinline int search_ioctl(struct inode *inode,
-                                struct btrfs_ioctl_search_args *args)
+                                struct btrfs_ioctl_search_key *sk,
+                                size_t *buf_size,
+                                char __user *ubuf)
  {
        struct btrfs_root *root;
        struct btrfs_key key;
        struct btrfs_path *path;
-       struct btrfs_ioctl_search_key *sk = &args->key;
        struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
        int ret;
        int num_found = 0;
        unsigned long sk_offset = 0;
  
+       if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) {
+               *buf_size = sizeof(struct btrfs_ioctl_search_header);
+               return -EOVERFLOW;
+       }
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
                                ret = 0;
                        goto err;
                }
-               ret = copy_to_sk(root, path, &key, sk, args->buf,
+               ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,
                                 &sk_offset, &num_found);
                btrfs_release_path(path);
-               if (ret || num_found >= sk->nr_items)
+               if (ret)
                        break;
  
        }
-       ret = 0;
+       if (ret > 0)
+               ret = 0;
  err:
        sk->nr_items = num_found;
        btrfs_free_path(path);
  static noinline int btrfs_ioctl_tree_search(struct file *file,
                                           void __user *argp)
  {
-        struct btrfs_ioctl_search_args *args;
-        struct inode *inode;
-        int ret;
+       struct btrfs_ioctl_search_args __user *uargs;
+       struct btrfs_ioctl_search_key sk;
+       struct inode *inode;
+       int ret;
+       size_t buf_size;
  
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
  
-       args = memdup_user(argp, sizeof(*args));
-       if (IS_ERR(args))
-               return PTR_ERR(args);
+       uargs = (struct btrfs_ioctl_search_args __user *)argp;
+       if (copy_from_user(&sk, &uargs->key, sizeof(sk)))
+               return -EFAULT;
+       buf_size = sizeof(uargs->buf);
  
        inode = file_inode(file);
-       ret = search_ioctl(inode, args);
-       if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
+       ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
+       /*
+        * In the origin implementation an overflow is handled by returning a
+        * search header with a len of zero, so reset ret.
+        */
+       if (ret == -EOVERFLOW)
+               ret = 0;
+       if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))
                ret = -EFAULT;
-       kfree(args);
+       return ret;
+ }
+ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
+                                              void __user *argp)
+ {
+       struct btrfs_ioctl_search_args_v2 __user *uarg;
+       struct btrfs_ioctl_search_args_v2 args;
+       struct inode *inode;
+       int ret;
+       size_t buf_size;
+       const size_t buf_limit = 16 * 1024 * 1024;
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       /* copy search header and buffer size */
+       uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp;
+       if (copy_from_user(&args, uarg, sizeof(args)))
+               return -EFAULT;
+       buf_size = args.buf_size;
+       if (buf_size < sizeof(struct btrfs_ioctl_search_header))
+               return -EOVERFLOW;
+       /* limit result size to 16MB */
+       if (buf_size > buf_limit)
+               buf_size = buf_limit;
+       inode = file_inode(file);
+       ret = search_ioctl(inode, &args.key, &buf_size,
+                          (char *)(&uarg->buf[0]));
+       if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
+               ret = -EFAULT;
+       else if (ret == -EOVERFLOW &&
+               copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size)))
+               ret = -EFAULT;
        return ret;
  }
  
@@@ -5198,6 -5291,8 +5291,8 @@@ long btrfs_ioctl(struct file *file, uns
                return btrfs_ioctl_trans_end(file);
        case BTRFS_IOC_TREE_SEARCH:
                return btrfs_ioctl_tree_search(file, argp);
+       case BTRFS_IOC_TREE_SEARCH_V2:
+               return btrfs_ioctl_tree_search_v2(file, argp);
        case BTRFS_IOC_INO_LOOKUP:
                return btrfs_ioctl_ino_lookup(file, argp);
        case BTRFS_IOC_INO_PATHS: