Merge branch 'fix-BPF-offload-related-bugs'
[linux-2.6-microblaze.git] / fs / fuse / virtio_fs.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * virtio-fs: Virtio Filesystem
4  * Copyright (C) 2018 Red Hat, Inc.
5  */
6
7 #include <linux/fs.h>
8 #include <linux/module.h>
9 #include <linux/virtio.h>
10 #include <linux/virtio_fs.h>
11 #include <linux/delay.h>
12 #include <linux/fs_context.h>
13 #include <linux/highmem.h>
14 #include "fuse_i.h"
15
16 /* List of virtio-fs device instances and a lock for the list. Also provides
17  * mutual exclusion in device removal and mounting path
18  */
19 static DEFINE_MUTEX(virtio_fs_mutex);
20 static LIST_HEAD(virtio_fs_instances);
21
22 enum {
23         VQ_HIPRIO,
24         VQ_REQUEST
25 };
26
27 /* Per-virtqueue state */
28 struct virtio_fs_vq {
29         spinlock_t lock;
30         struct virtqueue *vq;     /* protected by ->lock */
31         struct work_struct done_work;
32         struct list_head queued_reqs;
33         struct delayed_work dispatch_work;
34         struct fuse_dev *fud;
35         bool connected;
36         long in_flight;
37         char name[24];
38 } ____cacheline_aligned_in_smp;
39
40 /* A virtio-fs device instance */
41 struct virtio_fs {
42         struct kref refcount;
43         struct list_head list;    /* on virtio_fs_instances */
44         char *tag;
45         struct virtio_fs_vq *vqs;
46         unsigned int nvqs;               /* number of virtqueues */
47         unsigned int num_request_queues; /* number of request queues */
48 };
49
50 struct virtio_fs_forget {
51         struct fuse_in_header ih;
52         struct fuse_forget_in arg;
53         /* This request can be temporarily queued on virt queue */
54         struct list_head list;
55 };
56
57 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
58 {
59         struct virtio_fs *fs = vq->vdev->priv;
60
61         return &fs->vqs[vq->index];
62 }
63
64 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
65 {
66         return &vq_to_fsvq(vq)->fud->pq;
67 }
68
69 static void release_virtio_fs_obj(struct kref *ref)
70 {
71         struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
72
73         kfree(vfs->vqs);
74         kfree(vfs);
75 }
76
77 /* Make sure virtiofs_mutex is held */
78 static void virtio_fs_put(struct virtio_fs *fs)
79 {
80         kref_put(&fs->refcount, release_virtio_fs_obj);
81 }
82
83 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
84 {
85         struct virtio_fs *vfs = fiq->priv;
86
87         mutex_lock(&virtio_fs_mutex);
88         virtio_fs_put(vfs);
89         mutex_unlock(&virtio_fs_mutex);
90 }
91
92 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
93 {
94         WARN_ON(fsvq->in_flight < 0);
95
96         /* Wait for in flight requests to finish.*/
97         while (1) {
98                 spin_lock(&fsvq->lock);
99                 if (!fsvq->in_flight) {
100                         spin_unlock(&fsvq->lock);
101                         break;
102                 }
103                 spin_unlock(&fsvq->lock);
104                 /* TODO use completion instead of timeout */
105                 usleep_range(1000, 2000);
106         }
107
108         flush_work(&fsvq->done_work);
109         flush_delayed_work(&fsvq->dispatch_work);
110 }
111
112 static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
113 {
114         struct virtio_fs_forget *forget;
115
116         spin_lock(&fsvq->lock);
117         while (1) {
118                 forget = list_first_entry_or_null(&fsvq->queued_reqs,
119                                                 struct virtio_fs_forget, list);
120                 if (!forget)
121                         break;
122                 list_del(&forget->list);
123                 kfree(forget);
124         }
125         spin_unlock(&fsvq->lock);
126 }
127
128 static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
129 {
130         struct virtio_fs_vq *fsvq;
131         int i;
132
133         for (i = 0; i < fs->nvqs; i++) {
134                 fsvq = &fs->vqs[i];
135                 if (i == VQ_HIPRIO)
136                         drain_hiprio_queued_reqs(fsvq);
137
138                 virtio_fs_drain_queue(fsvq);
139         }
140 }
141
142 static void virtio_fs_start_all_queues(struct virtio_fs *fs)
143 {
144         struct virtio_fs_vq *fsvq;
145         int i;
146
147         for (i = 0; i < fs->nvqs; i++) {
148                 fsvq = &fs->vqs[i];
149                 spin_lock(&fsvq->lock);
150                 fsvq->connected = true;
151                 spin_unlock(&fsvq->lock);
152         }
153 }
154
155 /* Add a new instance to the list or return -EEXIST if tag name exists*/
156 static int virtio_fs_add_instance(struct virtio_fs *fs)
157 {
158         struct virtio_fs *fs2;
159         bool duplicate = false;
160
161         mutex_lock(&virtio_fs_mutex);
162
163         list_for_each_entry(fs2, &virtio_fs_instances, list) {
164                 if (strcmp(fs->tag, fs2->tag) == 0)
165                         duplicate = true;
166         }
167
168         if (!duplicate)
169                 list_add_tail(&fs->list, &virtio_fs_instances);
170
171         mutex_unlock(&virtio_fs_mutex);
172
173         if (duplicate)
174                 return -EEXIST;
175         return 0;
176 }
177
178 /* Return the virtio_fs with a given tag, or NULL */
179 static struct virtio_fs *virtio_fs_find_instance(const char *tag)
180 {
181         struct virtio_fs *fs;
182
183         mutex_lock(&virtio_fs_mutex);
184
185         list_for_each_entry(fs, &virtio_fs_instances, list) {
186                 if (strcmp(fs->tag, tag) == 0) {
187                         kref_get(&fs->refcount);
188                         goto found;
189                 }
190         }
191
192         fs = NULL; /* not found */
193
194 found:
195         mutex_unlock(&virtio_fs_mutex);
196
197         return fs;
198 }
199
200 static void virtio_fs_free_devs(struct virtio_fs *fs)
201 {
202         unsigned int i;
203
204         for (i = 0; i < fs->nvqs; i++) {
205                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
206
207                 if (!fsvq->fud)
208                         continue;
209
210                 fuse_dev_free(fsvq->fud);
211                 fsvq->fud = NULL;
212         }
213 }
214
215 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
216 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
217 {
218         char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
219         char *end;
220         size_t len;
221
222         virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
223                            &tag_buf, sizeof(tag_buf));
224         end = memchr(tag_buf, '\0', sizeof(tag_buf));
225         if (end == tag_buf)
226                 return -EINVAL; /* empty tag */
227         if (!end)
228                 end = &tag_buf[sizeof(tag_buf)];
229
230         len = end - tag_buf;
231         fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
232         if (!fs->tag)
233                 return -ENOMEM;
234         memcpy(fs->tag, tag_buf, len);
235         fs->tag[len] = '\0';
236         return 0;
237 }
238
239 /* Work function for hiprio completion */
240 static void virtio_fs_hiprio_done_work(struct work_struct *work)
241 {
242         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
243                                                  done_work);
244         struct virtqueue *vq = fsvq->vq;
245
246         /* Free completed FUSE_FORGET requests */
247         spin_lock(&fsvq->lock);
248         do {
249                 unsigned int len;
250                 void *req;
251
252                 virtqueue_disable_cb(vq);
253
254                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
255                         kfree(req);
256                         fsvq->in_flight--;
257                 }
258         } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
259         spin_unlock(&fsvq->lock);
260 }
261
262 static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
263 {
264 }
265
266 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
267 {
268         struct virtio_fs_forget *forget;
269         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
270                                                  dispatch_work.work);
271         struct virtqueue *vq = fsvq->vq;
272         struct scatterlist sg;
273         struct scatterlist *sgs[] = {&sg};
274         bool notify;
275         int ret;
276
277         pr_debug("virtio-fs: worker %s called.\n", __func__);
278         while (1) {
279                 spin_lock(&fsvq->lock);
280                 forget = list_first_entry_or_null(&fsvq->queued_reqs,
281                                         struct virtio_fs_forget, list);
282                 if (!forget) {
283                         spin_unlock(&fsvq->lock);
284                         return;
285                 }
286
287                 list_del(&forget->list);
288                 if (!fsvq->connected) {
289                         spin_unlock(&fsvq->lock);
290                         kfree(forget);
291                         continue;
292                 }
293
294                 sg_init_one(&sg, forget, sizeof(*forget));
295
296                 /* Enqueue the request */
297                 dev_dbg(&vq->vdev->dev, "%s\n", __func__);
298                 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
299                 if (ret < 0) {
300                         if (ret == -ENOMEM || ret == -ENOSPC) {
301                                 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
302                                          ret);
303                                 list_add_tail(&forget->list,
304                                                 &fsvq->queued_reqs);
305                                 schedule_delayed_work(&fsvq->dispatch_work,
306                                                 msecs_to_jiffies(1));
307                         } else {
308                                 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
309                                          ret);
310                                 kfree(forget);
311                         }
312                         spin_unlock(&fsvq->lock);
313                         return;
314                 }
315
316                 fsvq->in_flight++;
317                 notify = virtqueue_kick_prepare(vq);
318                 spin_unlock(&fsvq->lock);
319
320                 if (notify)
321                         virtqueue_notify(vq);
322                 pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
323                          __func__);
324         }
325 }
326
327 /* Allocate and copy args into req->argbuf */
328 static int copy_args_to_argbuf(struct fuse_req *req)
329 {
330         struct fuse_args *args = req->args;
331         unsigned int offset = 0;
332         unsigned int num_in;
333         unsigned int num_out;
334         unsigned int len;
335         unsigned int i;
336
337         num_in = args->in_numargs - args->in_pages;
338         num_out = args->out_numargs - args->out_pages;
339         len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
340               fuse_len_args(num_out, args->out_args);
341
342         req->argbuf = kmalloc(len, GFP_ATOMIC);
343         if (!req->argbuf)
344                 return -ENOMEM;
345
346         for (i = 0; i < num_in; i++) {
347                 memcpy(req->argbuf + offset,
348                        args->in_args[i].value,
349                        args->in_args[i].size);
350                 offset += args->in_args[i].size;
351         }
352
353         return 0;
354 }
355
356 /* Copy args out of and free req->argbuf */
357 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
358 {
359         unsigned int remaining;
360         unsigned int offset;
361         unsigned int num_in;
362         unsigned int num_out;
363         unsigned int i;
364
365         remaining = req->out.h.len - sizeof(req->out.h);
366         num_in = args->in_numargs - args->in_pages;
367         num_out = args->out_numargs - args->out_pages;
368         offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
369
370         for (i = 0; i < num_out; i++) {
371                 unsigned int argsize = args->out_args[i].size;
372
373                 if (args->out_argvar &&
374                     i == args->out_numargs - 1 &&
375                     argsize > remaining) {
376                         argsize = remaining;
377                 }
378
379                 memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
380                 offset += argsize;
381
382                 if (i != args->out_numargs - 1)
383                         remaining -= argsize;
384         }
385
386         /* Store the actual size of the variable-length arg */
387         if (args->out_argvar)
388                 args->out_args[args->out_numargs - 1].size = remaining;
389
390         kfree(req->argbuf);
391         req->argbuf = NULL;
392 }
393
394 /* Work function for request completion */
395 static void virtio_fs_requests_done_work(struct work_struct *work)
396 {
397         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
398                                                  done_work);
399         struct fuse_pqueue *fpq = &fsvq->fud->pq;
400         struct fuse_conn *fc = fsvq->fud->fc;
401         struct virtqueue *vq = fsvq->vq;
402         struct fuse_req *req;
403         struct fuse_args_pages *ap;
404         struct fuse_req *next;
405         struct fuse_args *args;
406         unsigned int len, i, thislen;
407         struct page *page;
408         LIST_HEAD(reqs);
409
410         /* Collect completed requests off the virtqueue */
411         spin_lock(&fsvq->lock);
412         do {
413                 virtqueue_disable_cb(vq);
414
415                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
416                         spin_lock(&fpq->lock);
417                         list_move_tail(&req->list, &reqs);
418                         spin_unlock(&fpq->lock);
419                 }
420         } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
421         spin_unlock(&fsvq->lock);
422
423         /* End requests */
424         list_for_each_entry_safe(req, next, &reqs, list) {
425                 /*
426                  * TODO verify that server properly follows FUSE protocol
427                  * (oh.uniq, oh.len)
428                  */
429                 args = req->args;
430                 copy_args_from_argbuf(args, req);
431
432                 if (args->out_pages && args->page_zeroing) {
433                         len = args->out_args[args->out_numargs - 1].size;
434                         ap = container_of(args, typeof(*ap), args);
435                         for (i = 0; i < ap->num_pages; i++) {
436                                 thislen = ap->descs[i].length;
437                                 if (len < thislen) {
438                                         WARN_ON(ap->descs[i].offset);
439                                         page = ap->pages[i];
440                                         zero_user_segment(page, len, thislen);
441                                         len = 0;
442                                 } else {
443                                         len -= thislen;
444                                 }
445                         }
446                 }
447
448                 spin_lock(&fpq->lock);
449                 clear_bit(FR_SENT, &req->flags);
450                 list_del_init(&req->list);
451                 spin_unlock(&fpq->lock);
452
453                 fuse_request_end(fc, req);
454                 spin_lock(&fsvq->lock);
455                 fsvq->in_flight--;
456                 spin_unlock(&fsvq->lock);
457         }
458 }
459
460 /* Virtqueue interrupt handler */
461 static void virtio_fs_vq_done(struct virtqueue *vq)
462 {
463         struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
464
465         dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
466
467         schedule_work(&fsvq->done_work);
468 }
469
470 /* Initialize virtqueues */
471 static int virtio_fs_setup_vqs(struct virtio_device *vdev,
472                                struct virtio_fs *fs)
473 {
474         struct virtqueue **vqs;
475         vq_callback_t **callbacks;
476         const char **names;
477         unsigned int i;
478         int ret = 0;
479
480         virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
481                      &fs->num_request_queues);
482         if (fs->num_request_queues == 0)
483                 return -EINVAL;
484
485         fs->nvqs = 1 + fs->num_request_queues;
486         fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
487         if (!fs->vqs)
488                 return -ENOMEM;
489
490         vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
491         callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
492                                         GFP_KERNEL);
493         names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
494         if (!vqs || !callbacks || !names) {
495                 ret = -ENOMEM;
496                 goto out;
497         }
498
499         callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
500         snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
501                         "hiprio");
502         names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
503         INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
504         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
505         INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
506                         virtio_fs_hiprio_dispatch_work);
507         spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
508
509         /* Initialize the requests virtqueues */
510         for (i = VQ_REQUEST; i < fs->nvqs; i++) {
511                 spin_lock_init(&fs->vqs[i].lock);
512                 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
513                 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
514                                         virtio_fs_dummy_dispatch_work);
515                 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
516                 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
517                          "requests.%u", i - VQ_REQUEST);
518                 callbacks[i] = virtio_fs_vq_done;
519                 names[i] = fs->vqs[i].name;
520         }
521
522         ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
523         if (ret < 0)
524                 goto out;
525
526         for (i = 0; i < fs->nvqs; i++)
527                 fs->vqs[i].vq = vqs[i];
528
529         virtio_fs_start_all_queues(fs);
530 out:
531         kfree(names);
532         kfree(callbacks);
533         kfree(vqs);
534         if (ret)
535                 kfree(fs->vqs);
536         return ret;
537 }
538
539 /* Free virtqueues (device must already be reset) */
540 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
541                                   struct virtio_fs *fs)
542 {
543         vdev->config->del_vqs(vdev);
544 }
545
546 static int virtio_fs_probe(struct virtio_device *vdev)
547 {
548         struct virtio_fs *fs;
549         int ret;
550
551         fs = kzalloc(sizeof(*fs), GFP_KERNEL);
552         if (!fs)
553                 return -ENOMEM;
554         kref_init(&fs->refcount);
555         vdev->priv = fs;
556
557         ret = virtio_fs_read_tag(vdev, fs);
558         if (ret < 0)
559                 goto out;
560
561         ret = virtio_fs_setup_vqs(vdev, fs);
562         if (ret < 0)
563                 goto out;
564
565         /* TODO vq affinity */
566
567         /* Bring the device online in case the filesystem is mounted and
568          * requests need to be sent before we return.
569          */
570         virtio_device_ready(vdev);
571
572         ret = virtio_fs_add_instance(fs);
573         if (ret < 0)
574                 goto out_vqs;
575
576         return 0;
577
578 out_vqs:
579         vdev->config->reset(vdev);
580         virtio_fs_cleanup_vqs(vdev, fs);
581
582 out:
583         vdev->priv = NULL;
584         kfree(fs);
585         return ret;
586 }
587
588 static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
589 {
590         struct virtio_fs_vq *fsvq;
591         int i;
592
593         for (i = 0; i < fs->nvqs; i++) {
594                 fsvq = &fs->vqs[i];
595                 spin_lock(&fsvq->lock);
596                 fsvq->connected = false;
597                 spin_unlock(&fsvq->lock);
598         }
599 }
600
601 static void virtio_fs_remove(struct virtio_device *vdev)
602 {
603         struct virtio_fs *fs = vdev->priv;
604
605         mutex_lock(&virtio_fs_mutex);
606         /* This device is going away. No one should get new reference */
607         list_del_init(&fs->list);
608         virtio_fs_stop_all_queues(fs);
609         virtio_fs_drain_all_queues(fs);
610         vdev->config->reset(vdev);
611         virtio_fs_cleanup_vqs(vdev, fs);
612
613         vdev->priv = NULL;
614         /* Put device reference on virtio_fs object */
615         virtio_fs_put(fs);
616         mutex_unlock(&virtio_fs_mutex);
617 }
618
619 #ifdef CONFIG_PM_SLEEP
620 static int virtio_fs_freeze(struct virtio_device *vdev)
621 {
622         /* TODO need to save state here */
623         pr_warn("virtio-fs: suspend/resume not yet supported\n");
624         return -EOPNOTSUPP;
625 }
626
627 static int virtio_fs_restore(struct virtio_device *vdev)
628 {
629          /* TODO need to restore state here */
630         return 0;
631 }
632 #endif /* CONFIG_PM_SLEEP */
633
634 const static struct virtio_device_id id_table[] = {
635         { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
636         {},
637 };
638
639 const static unsigned int feature_table[] = {};
640
641 static struct virtio_driver virtio_fs_driver = {
642         .driver.name            = KBUILD_MODNAME,
643         .driver.owner           = THIS_MODULE,
644         .id_table               = id_table,
645         .feature_table          = feature_table,
646         .feature_table_size     = ARRAY_SIZE(feature_table),
647         .probe                  = virtio_fs_probe,
648         .remove                 = virtio_fs_remove,
649 #ifdef CONFIG_PM_SLEEP
650         .freeze                 = virtio_fs_freeze,
651         .restore                = virtio_fs_restore,
652 #endif
653 };
654
655 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
656 __releases(fiq->lock)
657 {
658         struct fuse_forget_link *link;
659         struct virtio_fs_forget *forget;
660         struct scatterlist sg;
661         struct scatterlist *sgs[] = {&sg};
662         struct virtio_fs *fs;
663         struct virtqueue *vq;
664         struct virtio_fs_vq *fsvq;
665         bool notify;
666         u64 unique;
667         int ret;
668
669         link = fuse_dequeue_forget(fiq, 1, NULL);
670         unique = fuse_get_unique(fiq);
671
672         fs = fiq->priv;
673         fsvq = &fs->vqs[VQ_HIPRIO];
674         spin_unlock(&fiq->lock);
675
676         /* Allocate a buffer for the request */
677         forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
678
679         forget->ih = (struct fuse_in_header){
680                 .opcode = FUSE_FORGET,
681                 .nodeid = link->forget_one.nodeid,
682                 .unique = unique,
683                 .len = sizeof(*forget),
684         };
685         forget->arg = (struct fuse_forget_in){
686                 .nlookup = link->forget_one.nlookup,
687         };
688
689         sg_init_one(&sg, forget, sizeof(*forget));
690
691         /* Enqueue the request */
692         spin_lock(&fsvq->lock);
693
694         if (!fsvq->connected) {
695                 kfree(forget);
696                 spin_unlock(&fsvq->lock);
697                 goto out;
698         }
699
700         vq = fsvq->vq;
701         dev_dbg(&vq->vdev->dev, "%s\n", __func__);
702
703         ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
704         if (ret < 0) {
705                 if (ret == -ENOMEM || ret == -ENOSPC) {
706                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
707                                  ret);
708                         list_add_tail(&forget->list, &fsvq->queued_reqs);
709                         schedule_delayed_work(&fsvq->dispatch_work,
710                                         msecs_to_jiffies(1));
711                 } else {
712                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
713                                  ret);
714                         kfree(forget);
715                 }
716                 spin_unlock(&fsvq->lock);
717                 goto out;
718         }
719
720         fsvq->in_flight++;
721         notify = virtqueue_kick_prepare(vq);
722
723         spin_unlock(&fsvq->lock);
724
725         if (notify)
726                 virtqueue_notify(vq);
727 out:
728         kfree(link);
729 }
730
731 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
732 __releases(fiq->lock)
733 {
734         /*
735          * TODO interrupts.
736          *
737          * Normal fs operations on a local filesystems aren't interruptible.
738          * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
739          * with shared lock between host and guest.
740          */
741         spin_unlock(&fiq->lock);
742 }
743
744 /* Return the number of scatter-gather list elements required */
745 static unsigned int sg_count_fuse_req(struct fuse_req *req)
746 {
747         struct fuse_args *args = req->args;
748         struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
749         unsigned int total_sgs = 1 /* fuse_in_header */;
750
751         if (args->in_numargs - args->in_pages)
752                 total_sgs += 1;
753
754         if (args->in_pages)
755                 total_sgs += ap->num_pages;
756
757         if (!test_bit(FR_ISREPLY, &req->flags))
758                 return total_sgs;
759
760         total_sgs += 1 /* fuse_out_header */;
761
762         if (args->out_numargs - args->out_pages)
763                 total_sgs += 1;
764
765         if (args->out_pages)
766                 total_sgs += ap->num_pages;
767
768         return total_sgs;
769 }
770
771 /* Add pages to scatter-gather list and return number of elements used */
772 static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
773                                        struct page **pages,
774                                        struct fuse_page_desc *page_descs,
775                                        unsigned int num_pages,
776                                        unsigned int total_len)
777 {
778         unsigned int i;
779         unsigned int this_len;
780
781         for (i = 0; i < num_pages && total_len; i++) {
782                 sg_init_table(&sg[i], 1);
783                 this_len =  min(page_descs[i].length, total_len);
784                 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
785                 total_len -= this_len;
786         }
787
788         return i;
789 }
790
791 /* Add args to scatter-gather list and return number of elements used */
792 static unsigned int sg_init_fuse_args(struct scatterlist *sg,
793                                       struct fuse_req *req,
794                                       struct fuse_arg *args,
795                                       unsigned int numargs,
796                                       bool argpages,
797                                       void *argbuf,
798                                       unsigned int *len_used)
799 {
800         struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
801         unsigned int total_sgs = 0;
802         unsigned int len;
803
804         len = fuse_len_args(numargs - argpages, args);
805         if (len)
806                 sg_init_one(&sg[total_sgs++], argbuf, len);
807
808         if (argpages)
809                 total_sgs += sg_init_fuse_pages(&sg[total_sgs],
810                                                 ap->pages, ap->descs,
811                                                 ap->num_pages,
812                                                 args[numargs - 1].size);
813
814         if (len_used)
815                 *len_used = len;
816
817         return total_sgs;
818 }
819
820 /* Add a request to a virtqueue and kick the device */
821 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
822                                  struct fuse_req *req)
823 {
824         /* requests need at least 4 elements */
825         struct scatterlist *stack_sgs[6];
826         struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
827         struct scatterlist **sgs = stack_sgs;
828         struct scatterlist *sg = stack_sg;
829         struct virtqueue *vq;
830         struct fuse_args *args = req->args;
831         unsigned int argbuf_used = 0;
832         unsigned int out_sgs = 0;
833         unsigned int in_sgs = 0;
834         unsigned int total_sgs;
835         unsigned int i;
836         int ret;
837         bool notify;
838
839         /* Does the sglist fit on the stack? */
840         total_sgs = sg_count_fuse_req(req);
841         if (total_sgs > ARRAY_SIZE(stack_sgs)) {
842                 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
843                 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
844                 if (!sgs || !sg) {
845                         ret = -ENOMEM;
846                         goto out;
847                 }
848         }
849
850         /* Use a bounce buffer since stack args cannot be mapped */
851         ret = copy_args_to_argbuf(req);
852         if (ret < 0)
853                 goto out;
854
855         /* Request elements */
856         sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
857         out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
858                                      (struct fuse_arg *)args->in_args,
859                                      args->in_numargs, args->in_pages,
860                                      req->argbuf, &argbuf_used);
861
862         /* Reply elements */
863         if (test_bit(FR_ISREPLY, &req->flags)) {
864                 sg_init_one(&sg[out_sgs + in_sgs++],
865                             &req->out.h, sizeof(req->out.h));
866                 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
867                                             args->out_args, args->out_numargs,
868                                             args->out_pages,
869                                             req->argbuf + argbuf_used, NULL);
870         }
871
872         WARN_ON(out_sgs + in_sgs != total_sgs);
873
874         for (i = 0; i < total_sgs; i++)
875                 sgs[i] = &sg[i];
876
877         spin_lock(&fsvq->lock);
878
879         if (!fsvq->connected) {
880                 spin_unlock(&fsvq->lock);
881                 ret = -ENOTCONN;
882                 goto out;
883         }
884
885         vq = fsvq->vq;
886         ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
887         if (ret < 0) {
888                 spin_unlock(&fsvq->lock);
889                 goto out;
890         }
891
892         fsvq->in_flight++;
893         notify = virtqueue_kick_prepare(vq);
894
895         spin_unlock(&fsvq->lock);
896
897         if (notify)
898                 virtqueue_notify(vq);
899
900 out:
901         if (ret < 0 && req->argbuf) {
902                 kfree(req->argbuf);
903                 req->argbuf = NULL;
904         }
905         if (sgs != stack_sgs) {
906                 kfree(sgs);
907                 kfree(sg);
908         }
909
910         return ret;
911 }
912
913 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
914 __releases(fiq->lock)
915 {
916         unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
917         struct virtio_fs *fs;
918         struct fuse_conn *fc;
919         struct fuse_req *req;
920         struct fuse_pqueue *fpq;
921         int ret;
922
923         WARN_ON(list_empty(&fiq->pending));
924         req = list_last_entry(&fiq->pending, struct fuse_req, list);
925         clear_bit(FR_PENDING, &req->flags);
926         list_del_init(&req->list);
927         WARN_ON(!list_empty(&fiq->pending));
928         spin_unlock(&fiq->lock);
929
930         fs = fiq->priv;
931         fc = fs->vqs[queue_id].fud->fc;
932
933         pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
934                   __func__, req->in.h.opcode, req->in.h.unique,
935                  req->in.h.nodeid, req->in.h.len,
936                  fuse_len_args(req->args->out_numargs, req->args->out_args));
937
938         fpq = &fs->vqs[queue_id].fud->pq;
939         spin_lock(&fpq->lock);
940         if (!fpq->connected) {
941                 spin_unlock(&fpq->lock);
942                 req->out.h.error = -ENODEV;
943                 pr_err("virtio-fs: %s disconnected\n", __func__);
944                 fuse_request_end(fc, req);
945                 return;
946         }
947         list_add_tail(&req->list, fpq->processing);
948         spin_unlock(&fpq->lock);
949         set_bit(FR_SENT, &req->flags);
950         /* matches barrier in request_wait_answer() */
951         smp_mb__after_atomic();
952
953 retry:
954         ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
955         if (ret < 0) {
956                 if (ret == -ENOMEM || ret == -ENOSPC) {
957                         /* Virtqueue full. Retry submission */
958                         /* TODO use completion instead of timeout */
959                         usleep_range(20, 30);
960                         goto retry;
961                 }
962                 req->out.h.error = ret;
963                 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
964                 spin_lock(&fpq->lock);
965                 clear_bit(FR_SENT, &req->flags);
966                 list_del_init(&req->list);
967                 spin_unlock(&fpq->lock);
968                 fuse_request_end(fc, req);
969                 return;
970         }
971 }
972
973 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
974         .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
975         .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
976         .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
977         .release                        = virtio_fs_fiq_release,
978 };
979
980 static int virtio_fs_fill_super(struct super_block *sb)
981 {
982         struct fuse_conn *fc = get_fuse_conn_super(sb);
983         struct virtio_fs *fs = fc->iq.priv;
984         unsigned int i;
985         int err;
986         struct fuse_fs_context ctx = {
987                 .rootmode = S_IFDIR,
988                 .default_permissions = 1,
989                 .allow_other = 1,
990                 .max_read = UINT_MAX,
991                 .blksize = 512,
992                 .destroy = true,
993                 .no_control = true,
994                 .no_force_umount = true,
995         };
996
997         mutex_lock(&virtio_fs_mutex);
998
999         /* After holding mutex, make sure virtiofs device is still there.
1000          * Though we are holding a reference to it, drive ->remove might
1001          * still have cleaned up virtual queues. In that case bail out.
1002          */
1003         err = -EINVAL;
1004         if (list_empty(&fs->list)) {
1005                 pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1006                 goto err;
1007         }
1008
1009         err = -ENOMEM;
1010         /* Allocate fuse_dev for hiprio and notification queues */
1011         for (i = 0; i < VQ_REQUEST; i++) {
1012                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1013
1014                 fsvq->fud = fuse_dev_alloc();
1015                 if (!fsvq->fud)
1016                         goto err_free_fuse_devs;
1017         }
1018
1019         ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
1020         err = fuse_fill_super_common(sb, &ctx);
1021         if (err < 0)
1022                 goto err_free_fuse_devs;
1023
1024         fc = fs->vqs[VQ_REQUEST].fud->fc;
1025
1026         for (i = 0; i < fs->nvqs; i++) {
1027                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1028
1029                 if (i == VQ_REQUEST)
1030                         continue; /* already initialized */
1031                 fuse_dev_install(fsvq->fud, fc);
1032         }
1033
1034         /* Previous unmount will stop all queues. Start these again */
1035         virtio_fs_start_all_queues(fs);
1036         fuse_send_init(fc);
1037         mutex_unlock(&virtio_fs_mutex);
1038         return 0;
1039
1040 err_free_fuse_devs:
1041         virtio_fs_free_devs(fs);
1042 err:
1043         mutex_unlock(&virtio_fs_mutex);
1044         return err;
1045 }
1046
1047 static void virtio_kill_sb(struct super_block *sb)
1048 {
1049         struct fuse_conn *fc = get_fuse_conn_super(sb);
1050         struct virtio_fs *vfs;
1051         struct virtio_fs_vq *fsvq;
1052
1053         /* If mount failed, we can still be called without any fc */
1054         if (!fc)
1055                 return fuse_kill_sb_anon(sb);
1056
1057         vfs = fc->iq.priv;
1058         fsvq = &vfs->vqs[VQ_HIPRIO];
1059
1060         /* Stop forget queue. Soon destroy will be sent */
1061         spin_lock(&fsvq->lock);
1062         fsvq->connected = false;
1063         spin_unlock(&fsvq->lock);
1064         virtio_fs_drain_all_queues(vfs);
1065
1066         fuse_kill_sb_anon(sb);
1067
1068         /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
1069          * and drain one more time and free fuse devices. Freeing fuse
1070          * devices will drop their reference on fuse_conn and that in
1071          * turn will drop its reference on virtio_fs object.
1072          */
1073         virtio_fs_stop_all_queues(vfs);
1074         virtio_fs_drain_all_queues(vfs);
1075         virtio_fs_free_devs(vfs);
1076 }
1077
1078 static int virtio_fs_test_super(struct super_block *sb,
1079                                 struct fs_context *fsc)
1080 {
1081         struct fuse_conn *fc = fsc->s_fs_info;
1082
1083         return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
1084 }
1085
1086 static int virtio_fs_set_super(struct super_block *sb,
1087                                struct fs_context *fsc)
1088 {
1089         int err;
1090
1091         err = get_anon_bdev(&sb->s_dev);
1092         if (!err)
1093                 fuse_conn_get(fsc->s_fs_info);
1094
1095         return err;
1096 }
1097
1098 static int virtio_fs_get_tree(struct fs_context *fsc)
1099 {
1100         struct virtio_fs *fs;
1101         struct super_block *sb;
1102         struct fuse_conn *fc;
1103         int err;
1104
1105         /* This gets a reference on virtio_fs object. This ptr gets installed
1106          * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1107          * to drop the reference to this object.
1108          */
1109         fs = virtio_fs_find_instance(fsc->source);
1110         if (!fs) {
1111                 pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1112                 return -EINVAL;
1113         }
1114
1115         fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1116         if (!fc) {
1117                 mutex_lock(&virtio_fs_mutex);
1118                 virtio_fs_put(fs);
1119                 mutex_unlock(&virtio_fs_mutex);
1120                 return -ENOMEM;
1121         }
1122
1123         fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
1124                        fs);
1125         fc->release = fuse_free_conn;
1126         fc->delete_stale = true;
1127
1128         fsc->s_fs_info = fc;
1129         sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
1130         fuse_conn_put(fc);
1131         if (IS_ERR(sb))
1132                 return PTR_ERR(sb);
1133
1134         if (!sb->s_root) {
1135                 err = virtio_fs_fill_super(sb);
1136                 if (err) {
1137                         deactivate_locked_super(sb);
1138                         return err;
1139                 }
1140
1141                 sb->s_flags |= SB_ACTIVE;
1142         }
1143
1144         WARN_ON(fsc->root);
1145         fsc->root = dget(sb->s_root);
1146         return 0;
1147 }
1148
1149 static const struct fs_context_operations virtio_fs_context_ops = {
1150         .get_tree       = virtio_fs_get_tree,
1151 };
1152
1153 static int virtio_fs_init_fs_context(struct fs_context *fsc)
1154 {
1155         fsc->ops = &virtio_fs_context_ops;
1156         return 0;
1157 }
1158
1159 static struct file_system_type virtio_fs_type = {
1160         .owner          = THIS_MODULE,
1161         .name           = "virtiofs",
1162         .init_fs_context = virtio_fs_init_fs_context,
1163         .kill_sb        = virtio_kill_sb,
1164 };
1165
1166 static int __init virtio_fs_init(void)
1167 {
1168         int ret;
1169
1170         ret = register_virtio_driver(&virtio_fs_driver);
1171         if (ret < 0)
1172                 return ret;
1173
1174         ret = register_filesystem(&virtio_fs_type);
1175         if (ret < 0) {
1176                 unregister_virtio_driver(&virtio_fs_driver);
1177                 return ret;
1178         }
1179
1180         return 0;
1181 }
1182 module_init(virtio_fs_init);
1183
1184 static void __exit virtio_fs_exit(void)
1185 {
1186         unregister_filesystem(&virtio_fs_type);
1187         unregister_virtio_driver(&virtio_fs_driver);
1188 }
1189 module_exit(virtio_fs_exit);
1190
1191 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1192 MODULE_DESCRIPTION("Virtio Filesystem");
1193 MODULE_LICENSE("GPL");
1194 MODULE_ALIAS_FS(KBUILD_MODNAME);
1195 MODULE_DEVICE_TABLE(virtio, id_table);