mm/hugetlbfs: unmap pages if page fault raced with hole punch
[linux-2.6-microblaze.git] / fs / read_write.c
index c81ef39..06b07d5 100644 (file)
@@ -171,6 +171,45 @@ loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t si
 }
 EXPORT_SYMBOL(fixed_size_llseek);
 
+/**
+ * no_seek_end_llseek - llseek implementation for fixed-sized devices
+ * @file:      file structure to seek on
+ * @offset:    file offset to seek to
+ * @whence:    type of seek
+ *
+ */
+loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
+{
+       switch (whence) {
+       case SEEK_SET: case SEEK_CUR:
+               return generic_file_llseek_size(file, offset, whence,
+                                               ~0ULL, 0);
+       default:
+               return -EINVAL;
+       }
+}
+EXPORT_SYMBOL(no_seek_end_llseek);
+
+/**
+ * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
+ * @file:      file structure to seek on
+ * @offset:    file offset to seek to
+ * @whence:    type of seek
+ * @size:      maximal offset allowed
+ *
+ */
+loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
+{
+       switch (whence) {
+       case SEEK_SET: case SEEK_CUR:
+               return generic_file_llseek_size(file, offset, whence,
+                                               size, 0);
+       default:
+               return -EINVAL;
+       }
+}
+EXPORT_SYMBOL(no_seek_end_llseek_size);
+
 /**
  * noop_llseek - No Operation Performed llseek implementation
  * @file:      file structure to seek on
@@ -396,9 +435,8 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
        }
 
        if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
-               retval = locks_mandatory_area(
-                       read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
-                       inode, file, pos, count);
+               retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
+                               read_write == READ ? F_RDLCK : F_WRLCK);
                if (retval < 0)
                        return retval;
        }
@@ -1452,3 +1490,175 @@ out1:
 out2:
        return ret;
 }
+
+static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+{
+       struct inode *inode = file_inode(file);
+
+       if (unlikely(pos < 0))
+               return -EINVAL;
+
+        if (unlikely((loff_t) (pos + len) < 0))
+               return -EINVAL;
+
+       if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
+               loff_t end = len ? pos + len - 1 : OFFSET_MAX;
+               int retval;
+
+               retval = locks_mandatory_area(inode, file, pos, end,
+                               write ? F_WRLCK : F_RDLCK);
+               if (retval < 0)
+                       return retval;
+       }
+
+       return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
+}
+
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, u64 len)
+{
+       struct inode *inode_in = file_inode(file_in);
+       struct inode *inode_out = file_inode(file_out);
+       int ret;
+
+       if (inode_in->i_sb != inode_out->i_sb ||
+           file_in->f_path.mnt != file_out->f_path.mnt)
+               return -EXDEV;
+
+       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+               return -EISDIR;
+       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+               return -EINVAL;
+
+       if (!(file_in->f_mode & FMODE_READ) ||
+           !(file_out->f_mode & FMODE_WRITE) ||
+           (file_out->f_flags & O_APPEND) ||
+           !file_in->f_op->clone_file_range)
+               return -EBADF;
+
+       ret = clone_verify_area(file_in, pos_in, len, false);
+       if (ret)
+               return ret;
+
+       ret = clone_verify_area(file_out, pos_out, len, true);
+       if (ret)
+               return ret;
+
+       if (pos_in + len > i_size_read(inode_in))
+               return -EINVAL;
+
+       ret = mnt_want_write_file(file_out);
+       if (ret)
+               return ret;
+
+       ret = file_in->f_op->clone_file_range(file_in, pos_in,
+                       file_out, pos_out, len);
+       if (!ret) {
+               fsnotify_access(file_in);
+               fsnotify_modify(file_out);
+       }
+
+       mnt_drop_write_file(file_out);
+       return ret;
+}
+EXPORT_SYMBOL(vfs_clone_file_range);
+
+int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
+{
+       struct file_dedupe_range_info *info;
+       struct inode *src = file_inode(file);
+       u64 off;
+       u64 len;
+       int i;
+       int ret;
+       bool is_admin = capable(CAP_SYS_ADMIN);
+       u16 count = same->dest_count;
+       struct file *dst_file;
+       loff_t dst_off;
+       ssize_t deduped;
+
+       if (!(file->f_mode & FMODE_READ))
+               return -EINVAL;
+
+       if (same->reserved1 || same->reserved2)
+               return -EINVAL;
+
+       off = same->src_offset;
+       len = same->src_length;
+
+       ret = -EISDIR;
+       if (S_ISDIR(src->i_mode))
+               goto out;
+
+       ret = -EINVAL;
+       if (!S_ISREG(src->i_mode))
+               goto out;
+
+       ret = clone_verify_area(file, off, len, false);
+       if (ret < 0)
+               goto out;
+       ret = 0;
+
+       /* pre-format output fields to sane values */
+       for (i = 0; i < count; i++) {
+               same->info[i].bytes_deduped = 0ULL;
+               same->info[i].status = FILE_DEDUPE_RANGE_SAME;
+       }
+
+       for (i = 0, info = same->info; i < count; i++, info++) {
+               struct inode *dst;
+               struct fd dst_fd = fdget(info->dest_fd);
+
+               dst_file = dst_fd.file;
+               if (!dst_file) {
+                       info->status = -EBADF;
+                       goto next_loop;
+               }
+               dst = file_inode(dst_file);
+
+               ret = mnt_want_write_file(dst_file);
+               if (ret) {
+                       info->status = ret;
+                       goto next_loop;
+               }
+
+               dst_off = info->dest_offset;
+               ret = clone_verify_area(dst_file, dst_off, len, true);
+               if (ret < 0) {
+                       info->status = ret;
+                       goto next_file;
+               }
+               ret = 0;
+
+               if (info->reserved) {
+                       info->status = -EINVAL;
+               } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+                       info->status = -EINVAL;
+               } else if (file->f_path.mnt != dst_file->f_path.mnt) {
+                       info->status = -EXDEV;
+               } else if (S_ISDIR(dst->i_mode)) {
+                       info->status = -EISDIR;
+               } else if (dst_file->f_op->dedupe_file_range == NULL) {
+                       info->status = -EINVAL;
+               } else {
+                       deduped = dst_file->f_op->dedupe_file_range(file, off,
+                                                       len, dst_file,
+                                                       info->dest_offset);
+                       if (deduped == -EBADE)
+                               info->status = FILE_DEDUPE_RANGE_DIFFERS;
+                       else if (deduped < 0)
+                               info->status = deduped;
+                       else
+                               info->bytes_deduped += deduped;
+               }
+
+next_file:
+               mnt_drop_write_file(dst_file);
+next_loop:
+               fdput(dst_fd);
+       }
+
+out:
+       return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range);