virtiofs: schedule blocking async replies in separate worker
authorVivek Goyal <vgoyal@redhat.com>
Mon, 20 Apr 2020 15:01:34 +0000 (17:01 +0200)
committerMiklos Szeredi <mszeredi@redhat.com>
Mon, 20 Apr 2020 15:01:34 +0000 (17:01 +0200)
In virtiofs (unlike in regular fuse) processing of async replies is
serialized.  This can result in a deadlock in rare corner cases when
there's a circular dependency between the completion of two or more async
replies.

Such a deadlock can be reproduced with xfstests:generic/503 if TEST_DIR ==
SCRATCH_MNT (which is a misconfiguration):

 - Process A is waiting for page lock in worker thread context and blocked
   (virtio_fs_requests_done_work()).
 - Process B is holding page lock and waiting for pending writes to
   finish (fuse_wait_on_page_writeback()).
 - Write requests are waiting in virtqueue and can't complete because
   worker thread is blocked on page lock (process A).

Fix this by creating a unique work_struct for each async reply that can
block (O_DIRECT read).

Fixes: a62a8ef9d97d ("virtio-fs: add virtiofs filesystem")
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/virtio_fs.c

index 9d67b83..d400b71 100644 (file)
@@ -712,6 +712,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
        spin_unlock(&io->lock);
 
        ia->ap.args.end = fuse_aio_complete_req;
+       ia->ap.args.may_block = io->should_dirty;
        err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
        if (err)
                fuse_aio_complete_req(fc, &ia->ap.args, err);
index ca344bf..d7cde21 100644 (file)
@@ -249,6 +249,7 @@ struct fuse_args {
        bool out_argvar:1;
        bool page_zeroing:1;
        bool page_replace:1;
+       bool may_block:1;
        struct fuse_in_arg in_args[3];
        struct fuse_arg out_args[2];
        void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);
index bade747..0c6ef5d 100644 (file)
@@ -60,6 +60,12 @@ struct virtio_fs_forget {
        struct virtio_fs_forget_req req;
 };
 
+struct virtio_fs_req_work {
+       struct fuse_req *req;
+       struct virtio_fs_vq *fsvq;
+       struct work_struct done_work;
+};
+
 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
                                 struct fuse_req *req, bool in_flight);
 
@@ -485,19 +491,67 @@ static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
 }
 
 /* Work function for request completion */
+static void virtio_fs_request_complete(struct fuse_req *req,
+                                      struct virtio_fs_vq *fsvq)
+{
+       struct fuse_pqueue *fpq = &fsvq->fud->pq;
+       struct fuse_conn *fc = fsvq->fud->fc;
+       struct fuse_args *args;
+       struct fuse_args_pages *ap;
+       unsigned int len, i, thislen;
+       struct page *page;
+
+       /*
+        * TODO verify that server properly follows FUSE protocol
+        * (oh.uniq, oh.len)
+        */
+       args = req->args;
+       copy_args_from_argbuf(args, req);
+
+       if (args->out_pages && args->page_zeroing) {
+               len = args->out_args[args->out_numargs - 1].size;
+               ap = container_of(args, typeof(*ap), args);
+               for (i = 0; i < ap->num_pages; i++) {
+                       thislen = ap->descs[i].length;
+                       if (len < thislen) {
+                               WARN_ON(ap->descs[i].offset);
+                               page = ap->pages[i];
+                               zero_user_segment(page, len, thislen);
+                               len = 0;
+                       } else {
+                               len -= thislen;
+                       }
+               }
+       }
+
+       spin_lock(&fpq->lock);
+       clear_bit(FR_SENT, &req->flags);
+       spin_unlock(&fpq->lock);
+
+       fuse_request_end(fc, req);
+       spin_lock(&fsvq->lock);
+       dec_in_flight_req(fsvq);
+       spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_complete_req_work(struct work_struct *work)
+{
+       struct virtio_fs_req_work *w =
+               container_of(work, typeof(*w), done_work);
+
+       virtio_fs_request_complete(w->req, w->fsvq);
+       kfree(w);
+}
+
 static void virtio_fs_requests_done_work(struct work_struct *work)
 {
        struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
                                                 done_work);
        struct fuse_pqueue *fpq = &fsvq->fud->pq;
-       struct fuse_conn *fc = fsvq->fud->fc;
        struct virtqueue *vq = fsvq->vq;
        struct fuse_req *req;
-       struct fuse_args_pages *ap;
        struct fuse_req *next;
-       struct fuse_args *args;
-       unsigned int len, i, thislen;
-       struct page *page;
+       unsigned int len;
        LIST_HEAD(reqs);
 
        /* Collect completed requests off the virtqueue */
@@ -515,38 +569,20 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
 
        /* End requests */
        list_for_each_entry_safe(req, next, &reqs, list) {
-               /*
-                * TODO verify that server properly follows FUSE protocol
-                * (oh.uniq, oh.len)
-                */
-               args = req->args;
-               copy_args_from_argbuf(args, req);
-
-               if (args->out_pages && args->page_zeroing) {
-                       len = args->out_args[args->out_numargs - 1].size;
-                       ap = container_of(args, typeof(*ap), args);
-                       for (i = 0; i < ap->num_pages; i++) {
-                               thislen = ap->descs[i].length;
-                               if (len < thislen) {
-                                       WARN_ON(ap->descs[i].offset);
-                                       page = ap->pages[i];
-                                       zero_user_segment(page, len, thislen);
-                                       len = 0;
-                               } else {
-                                       len -= thislen;
-                               }
-                       }
-               }
-
-               spin_lock(&fpq->lock);
-               clear_bit(FR_SENT, &req->flags);
                list_del_init(&req->list);
-               spin_unlock(&fpq->lock);
 
-               fuse_request_end(fc, req);
-               spin_lock(&fsvq->lock);
-               dec_in_flight_req(fsvq);
-               spin_unlock(&fsvq->lock);
+               /* blocking async request completes in a worker context */
+               if (req->args->may_block) {
+                       struct virtio_fs_req_work *w;
+
+                       w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL);
+                       INIT_WORK(&w->done_work, virtio_fs_complete_req_work);
+                       w->fsvq = fsvq;
+                       w->req = req;
+                       schedule_work(&w->done_work);
+               } else {
+                       virtio_fs_request_complete(req, fsvq);
+               }
        }
 }