X-Git-Url: http://git.monstr.eu/?a=blobdiff_plain;f=fs%2Fuserfaultfd.c;h=003f0d31743eb3cd95de8c8da21a4d0d731daf73;hb=2c3ef25c4a60cc18cf3f05a74c78220748f25684;hp=f6e0f0c0d0e577f0be543d9bb3c3bc2cce6891fa;hpb=0d0a19395baa36ab186df8081ab7f7b57c3fade1;p=linux-2.6-microblaze.git diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f6e0f0c0d0e5..003f0d31743e 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -33,11 +33,6 @@ int sysctl_unprivileged_userfaultfd __read_mostly; static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; -enum userfaultfd_state { - UFFD_STATE_WAIT_API, - UFFD_STATE_RUNNING, -}; - /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. @@ -69,12 +64,10 @@ struct userfaultfd_ctx { unsigned int flags; /* features requested from the userspace */ unsigned int features; - /* state machine */ - enum userfaultfd_state state; /* released */ bool released; /* memory mappings are changing because of non-cooperative event */ - bool mmap_changing; + atomic_t mmap_changing; /* mm with one ore more vmas attached to this userfaultfd_ctx */ struct mm_struct *mm; }; @@ -104,6 +97,14 @@ struct userfaultfd_wake_range { unsigned long len; }; +/* internal indication that UFFD_API ioctl was successfully executed */ +#define UFFD_FEATURE_INITIALIZED (1u << 31) + +static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) +{ + return ctx->features & UFFD_FEATURE_INITIALIZED; +} + static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { @@ -623,7 +624,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, * already released. */ out: - WRITE_ONCE(ctx->mmap_changing, false); + atomic_dec(&ctx->mmap_changing); + VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0); userfaultfd_ctx_put(ctx); } @@ -666,15 +668,14 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) refcount_set(&ctx->refcount, 1); ctx->flags = octx->flags; - ctx->state = UFFD_STATE_RUNNING; ctx->features = octx->features; ctx->released = false; - ctx->mmap_changing = false; + atomic_set(&ctx->mmap_changing, 0); ctx->mm = vma->vm_mm; mmgrab(ctx->mm); userfaultfd_ctx_get(octx); - WRITE_ONCE(octx->mmap_changing, true); + atomic_inc(&octx->mmap_changing); fctx->orig = octx; fctx->new = ctx; list_add_tail(&fctx->list, fcs); @@ -721,7 +722,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, if (ctx->features & UFFD_FEATURE_EVENT_REMAP) { vm_ctx->ctx = ctx; userfaultfd_ctx_get(ctx); - WRITE_ONCE(ctx->mmap_changing, true); + atomic_inc(&ctx->mmap_changing); } else { /* Drop uffd context if remap feature not enabled */ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; @@ -766,7 +767,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma, return true; userfaultfd_ctx_get(ctx); - WRITE_ONCE(ctx->mmap_changing, true); + atomic_inc(&ctx->mmap_changing); mmap_read_unlock(mm); msg_init(&ewq.msg); @@ -810,7 +811,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, return -ENOMEM; userfaultfd_ctx_get(ctx); - WRITE_ONCE(ctx->mmap_changing, true); + atomic_inc(&ctx->mmap_changing); unmap_ctx->ctx = ctx; unmap_ctx->start = start; unmap_ctx->end = end; @@ -943,38 +944,33 @@ static __poll_t userfaultfd_poll(struct file *file, poll_table *wait) poll_wait(file, &ctx->fd_wqh, wait); - switch (ctx->state) { - case UFFD_STATE_WAIT_API: + if (!userfaultfd_is_initialized(ctx)) return EPOLLERR; - case UFFD_STATE_RUNNING: - /* - * poll() never guarantees that read won't block. - * userfaults can be waken before they're read(). - */ - if (unlikely(!(file->f_flags & O_NONBLOCK))) - return EPOLLERR; - /* - * lockless access to see if there are pending faults - * __pollwait last action is the add_wait_queue but - * the spin_unlock would allow the waitqueue_active to - * pass above the actual list_add inside - * add_wait_queue critical section. So use a full - * memory barrier to serialize the list_add write of - * add_wait_queue() with the waitqueue_active read - * below. - */ - ret = 0; - smp_mb(); - if (waitqueue_active(&ctx->fault_pending_wqh)) - ret = EPOLLIN; - else if (waitqueue_active(&ctx->event_wqh)) - ret = EPOLLIN; - return ret; - default: - WARN_ON_ONCE(1); + /* + * poll() never guarantees that read won't block. + * userfaults can be waken before they're read(). + */ + if (unlikely(!(file->f_flags & O_NONBLOCK))) return EPOLLERR; - } + /* + * lockless access to see if there are pending faults + * __pollwait last action is the add_wait_queue but + * the spin_unlock would allow the waitqueue_active to + * pass above the actual list_add inside + * add_wait_queue critical section. So use a full + * memory barrier to serialize the list_add write of + * add_wait_queue() with the waitqueue_active read + * below. + */ + ret = 0; + smp_mb(); + if (waitqueue_active(&ctx->fault_pending_wqh)) + ret = EPOLLIN; + else if (waitqueue_active(&ctx->event_wqh)) + ret = EPOLLIN; + + return ret; } static const struct file_operations userfaultfd_fops; @@ -1169,7 +1165,7 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf, int no_wait = file->f_flags & O_NONBLOCK; struct inode *inode = file_inode(file); - if (ctx->state == UFFD_STATE_WAIT_API) + if (!userfaultfd_is_initialized(ctx)) return -EINVAL; for (;;) { @@ -1236,23 +1232,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, } static __always_inline int validate_range(struct mm_struct *mm, - __u64 *start, __u64 len) + __u64 start, __u64 len) { __u64 task_size = mm->task_size; - *start = untagged_addr(*start); - - if (*start & ~PAGE_MASK) + if (start & ~PAGE_MASK) return -EINVAL; if (len & ~PAGE_MASK) return -EINVAL; if (!len) return -EINVAL; - if (*start < mmap_min_addr) + if (start < mmap_min_addr) return -EINVAL; - if (*start >= task_size) + if (start >= task_size) return -EINVAL; - if (len > task_size - *start) + if (len > task_size - start) return -EINVAL; return 0; } @@ -1316,7 +1310,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vm_flags |= VM_UFFD_MINOR; } - ret = validate_range(mm, &uffdio_register.range.start, + ret = validate_range(mm, uffdio_register.range.start, uffdio_register.range.len); if (ret) goto out; @@ -1522,7 +1516,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) goto out; - ret = validate_range(mm, &uffdio_unregister.start, + ret = validate_range(mm, uffdio_unregister.start, uffdio_unregister.len); if (ret) goto out; @@ -1671,7 +1665,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) goto out; - ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); + ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); if (ret) goto out; @@ -1702,7 +1696,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, user_uffdio_copy = (struct uffdio_copy __user *) arg; ret = -EAGAIN; - if (READ_ONCE(ctx->mmap_changing)) + if (atomic_read(&ctx->mmap_changing)) goto out; ret = -EFAULT; @@ -1711,7 +1705,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, sizeof(uffdio_copy)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); + ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); if (ret) goto out; /* @@ -1759,7 +1753,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; ret = -EAGAIN; - if (READ_ONCE(ctx->mmap_changing)) + if (atomic_read(&ctx->mmap_changing)) goto out; ret = -EFAULT; @@ -1768,7 +1762,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, sizeof(uffdio_zeropage)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, + ret = validate_range(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); if (ret) goto out; @@ -1809,7 +1803,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, struct userfaultfd_wake_range range; bool mode_wp, mode_dontwake; - if (READ_ONCE(ctx->mmap_changing)) + if (atomic_read(&ctx->mmap_changing)) return -EAGAIN; user_uffdio_wp = (struct uffdio_writeprotect __user *) arg; @@ -1818,7 +1812,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, sizeof(struct uffdio_writeprotect))) return -EFAULT; - ret = validate_range(ctx->mm, &uffdio_wp.range.start, + ret = validate_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len); if (ret) return ret; @@ -1857,7 +1851,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) user_uffdio_continue = (struct uffdio_continue __user *)arg; ret = -EAGAIN; - if (READ_ONCE(ctx->mmap_changing)) + if (atomic_read(&ctx->mmap_changing)) goto out; ret = -EFAULT; @@ -1866,7 +1860,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) sizeof(uffdio_continue) - (sizeof(__s64)))) goto out; - ret = validate_range(ctx->mm, &uffdio_continue.range.start, + ret = validate_range(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len); if (ret) goto out; @@ -1910,9 +1904,10 @@ out: static inline unsigned int uffd_ctx_features(__u64 user_features) { /* - * For the current set of features the bits just coincide + * For the current set of features the bits just coincide. Set + * UFFD_FEATURE_INITIALIZED to mark the features as enabled. */ - return (unsigned int)user_features; + return (unsigned int)user_features | UFFD_FEATURE_INITIALIZED; } /* @@ -1925,12 +1920,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, { struct uffdio_api uffdio_api; void __user *buf = (void __user *)arg; + unsigned int ctx_features; int ret; __u64 features; - ret = -EINVAL; - if (ctx->state != UFFD_STATE_WAIT_API) - goto out; ret = -EFAULT; if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; @@ -1954,9 +1947,13 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ret = -EFAULT; if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) goto out; - ctx->state = UFFD_STATE_RUNNING; + /* only enable the requested features for this uffd context */ - ctx->features = uffd_ctx_features(features); + ctx_features = uffd_ctx_features(features); + ret = -EINVAL; + if (cmpxchg(&ctx->features, 0, ctx_features) != 0) + goto err_out; + ret = 0; out: return ret; @@ -1973,7 +1970,7 @@ static long userfaultfd_ioctl(struct file *file, unsigned cmd, int ret = -EINVAL; struct userfaultfd_ctx *ctx = file->private_data; - if (cmd != UFFDIO_API && ctx->state == UFFD_STATE_WAIT_API) + if (cmd != UFFDIO_API && !userfaultfd_is_initialized(ctx)) return -EINVAL; switch(cmd) { @@ -2087,9 +2084,8 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) refcount_set(&ctx->refcount, 1); ctx->flags = flags; ctx->features = 0; - ctx->state = UFFD_STATE_WAIT_API; ctx->released = false; - ctx->mmap_changing = false; + atomic_set(&ctx->mmap_changing, 0); ctx->mm = current->mm; /* prevent the mm struct to be freed */ mmgrab(ctx->mm);