Merge branch 'misc.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[linux-2.6-microblaze.git] / arch / um / drivers / virtio_uml.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtio vhost-user driver
4  *
5  * Copyright(c) 2019 Intel Corporation
6  *
7  * This driver allows virtio devices to be used over a vhost-user socket.
8  *
9  * Guest devices can be instantiated by kernel module or command line
10  * parameters. One device will be created for each parameter. Syntax:
11  *
12  *              virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13  * where:
14  *              <socket>        := vhost-user socket path to connect
15  *              <virtio_id>     := virtio device id (as in virtio_ids.h)
16  *              <platform_id>   := (optional) platform device id
17  *
18  * example:
19  *              virtio_uml.device=/var/uml.socket:1
20  *
21  * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22  */
23 #include <linux/module.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 #include <linux/virtio.h>
27 #include <linux/virtio_config.h>
28 #include <linux/virtio_ring.h>
29 #include <linux/time-internal.h>
30 #include <linux/virtio-uml.h>
31 #include <shared/as-layout.h>
32 #include <irq_kern.h>
33 #include <init.h>
34 #include <os.h>
35 #include "vhost_user.h"
36
37 #define MAX_SUPPORTED_QUEUE_SIZE        256
38
39 #define to_virtio_uml_device(_vdev) \
40         container_of(_vdev, struct virtio_uml_device, vdev)
41
42 struct virtio_uml_platform_data {
43         u32 virtio_device_id;
44         const char *socket_path;
45         struct work_struct conn_broken_wk;
46         struct platform_device *pdev;
47 };
48
49 struct virtio_uml_device {
50         struct virtio_device vdev;
51         struct platform_device *pdev;
52
53         spinlock_t sock_lock;
54         int sock, req_fd, irq;
55         u64 features;
56         u64 protocol_features;
57         u8 status;
58         u8 registered:1;
59         u8 suspended:1;
60         u8 no_vq_suspend:1;
61
62         u8 config_changed_irq:1;
63         uint64_t vq_irq_vq_map;
64 };
65
66 struct virtio_uml_vq_info {
67         int kick_fd, call_fd;
68         char name[32];
69         bool suspended;
70 };
71
72 extern unsigned long long physmem_size, highmem;
73
74 #define vu_err(vu_dev, ...)     dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
75
76 /* Vhost-user protocol */
77
78 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
79                             const int *fds, unsigned int fds_num)
80 {
81         int rc;
82
83         do {
84                 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
85                 if (rc > 0) {
86                         buf += rc;
87                         len -= rc;
88                         fds = NULL;
89                         fds_num = 0;
90                 }
91         } while (len && (rc >= 0 || rc == -EINTR));
92
93         if (rc < 0)
94                 return rc;
95         return 0;
96 }
97
98 static int full_read(int fd, void *buf, int len, bool abortable)
99 {
100         int rc;
101
102         if (!len)
103                 return 0;
104
105         do {
106                 rc = os_read_file(fd, buf, len);
107                 if (rc > 0) {
108                         buf += rc;
109                         len -= rc;
110                 }
111         } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
112
113         if (rc < 0)
114                 return rc;
115         if (rc == 0)
116                 return -ECONNRESET;
117         return 0;
118 }
119
120 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
121 {
122         return full_read(fd, msg, sizeof(msg->header), true);
123 }
124
125 static int vhost_user_recv(struct virtio_uml_device *vu_dev,
126                            int fd, struct vhost_user_msg *msg,
127                            size_t max_payload_size, bool wait)
128 {
129         size_t size;
130         int rc;
131
132         /*
133          * In virtio time-travel mode, we're handling all the vhost-user
134          * FDs by polling them whenever appropriate. However, we may get
135          * into a situation where we're sending out an interrupt message
136          * to a device (e.g. a net device) and need to handle a simulation
137          * time message while doing so, e.g. one that tells us to update
138          * our idea of how long we can run without scheduling.
139          *
140          * Thus, we need to not just read() from the given fd, but need
141          * to also handle messages for the simulation time - this function
142          * does that for us while waiting for the given fd to be readable.
143          */
144         if (wait)
145                 time_travel_wait_readable(fd);
146
147         rc = vhost_user_recv_header(fd, msg);
148
149         if (rc == -ECONNRESET && vu_dev->registered) {
150                 struct virtio_uml_platform_data *pdata;
151
152                 pdata = vu_dev->pdev->dev.platform_data;
153
154                 virtio_break_device(&vu_dev->vdev);
155                 schedule_work(&pdata->conn_broken_wk);
156         }
157         if (rc)
158                 return rc;
159         size = msg->header.size;
160         if (size > max_payload_size)
161                 return -EPROTO;
162         return full_read(fd, &msg->payload, size, false);
163 }
164
165 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
166                                 struct vhost_user_msg *msg,
167                                 size_t max_payload_size)
168 {
169         int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
170                                  max_payload_size, true);
171
172         if (rc)
173                 return rc;
174
175         if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
176                 return -EPROTO;
177
178         return 0;
179 }
180
181 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
182                                u64 *value)
183 {
184         struct vhost_user_msg msg;
185         int rc = vhost_user_recv_resp(vu_dev, &msg,
186                                       sizeof(msg.payload.integer));
187
188         if (rc)
189                 return rc;
190         if (msg.header.size != sizeof(msg.payload.integer))
191                 return -EPROTO;
192         *value = msg.payload.integer;
193         return 0;
194 }
195
196 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
197                                struct vhost_user_msg *msg,
198                                size_t max_payload_size)
199 {
200         int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
201                                  max_payload_size, false);
202
203         if (rc)
204                 return rc;
205
206         if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
207                         VHOST_USER_VERSION)
208                 return -EPROTO;
209
210         return 0;
211 }
212
213 static int vhost_user_send(struct virtio_uml_device *vu_dev,
214                            bool need_response, struct vhost_user_msg *msg,
215                            int *fds, size_t num_fds)
216 {
217         size_t size = sizeof(msg->header) + msg->header.size;
218         unsigned long flags;
219         bool request_ack;
220         int rc;
221
222         msg->header.flags |= VHOST_USER_VERSION;
223
224         /*
225          * The need_response flag indicates that we already need a response,
226          * e.g. to read the features. In these cases, don't request an ACK as
227          * it is meaningless. Also request an ACK only if supported.
228          */
229         request_ack = !need_response;
230         if (!(vu_dev->protocol_features &
231                         BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
232                 request_ack = false;
233
234         if (request_ack)
235                 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
236
237         spin_lock_irqsave(&vu_dev->sock_lock, flags);
238         rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
239         if (rc < 0)
240                 goto out;
241
242         if (request_ack) {
243                 uint64_t status;
244
245                 rc = vhost_user_recv_u64(vu_dev, &status);
246                 if (rc)
247                         goto out;
248
249                 if (status) {
250                         vu_err(vu_dev, "slave reports error: %llu\n", status);
251                         rc = -EIO;
252                         goto out;
253                 }
254         }
255
256 out:
257         spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
258         return rc;
259 }
260
261 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
262                                       bool need_response, u32 request)
263 {
264         struct vhost_user_msg msg = {
265                 .header.request = request,
266         };
267
268         return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
269 }
270
271 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
272                                          u32 request, int fd)
273 {
274         struct vhost_user_msg msg = {
275                 .header.request = request,
276         };
277
278         return vhost_user_send(vu_dev, false, &msg, &fd, 1);
279 }
280
281 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
282                                u32 request, u64 value)
283 {
284         struct vhost_user_msg msg = {
285                 .header.request = request,
286                 .header.size = sizeof(msg.payload.integer),
287                 .payload.integer = value,
288         };
289
290         return vhost_user_send(vu_dev, false, &msg, NULL, 0);
291 }
292
293 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
294 {
295         return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
296 }
297
298 static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
299                                    u64 *features)
300 {
301         int rc = vhost_user_send_no_payload(vu_dev, true,
302                                             VHOST_USER_GET_FEATURES);
303
304         if (rc)
305                 return rc;
306         return vhost_user_recv_u64(vu_dev, features);
307 }
308
309 static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
310                                    u64 features)
311 {
312         return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
313 }
314
315 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
316                                             u64 *protocol_features)
317 {
318         int rc = vhost_user_send_no_payload(vu_dev, true,
319                         VHOST_USER_GET_PROTOCOL_FEATURES);
320
321         if (rc)
322                 return rc;
323         return vhost_user_recv_u64(vu_dev, protocol_features);
324 }
325
326 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
327                                             u64 protocol_features)
328 {
329         return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
330                                    protocol_features);
331 }
332
333 static void vhost_user_reply(struct virtio_uml_device *vu_dev,
334                              struct vhost_user_msg *msg, int response)
335 {
336         struct vhost_user_msg reply = {
337                 .payload.integer = response,
338         };
339         size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
340         int rc;
341
342         reply.header = msg->header;
343         reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
344         reply.header.flags |= VHOST_USER_FLAG_REPLY;
345         reply.header.size = sizeof(reply.payload.integer);
346
347         rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
348
349         if (rc)
350                 vu_err(vu_dev,
351                        "sending reply to slave request failed: %d (size %zu)\n",
352                        rc, size);
353 }
354
355 static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
356                                        struct time_travel_event *ev)
357 {
358         struct virtqueue *vq;
359         int response = 1;
360         struct {
361                 struct vhost_user_msg msg;
362                 u8 extra_payload[512];
363         } msg;
364         int rc;
365
366         rc = vhost_user_recv_req(vu_dev, &msg.msg,
367                                  sizeof(msg.msg.payload) +
368                                  sizeof(msg.extra_payload));
369
370         if (rc)
371                 return IRQ_NONE;
372
373         switch (msg.msg.header.request) {
374         case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
375                 vu_dev->config_changed_irq = true;
376                 response = 0;
377                 break;
378         case VHOST_USER_SLAVE_VRING_CALL:
379                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
380                         if (vq->index == msg.msg.payload.vring_state.index) {
381                                 response = 0;
382                                 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
383                                 break;
384                         }
385                 }
386                 break;
387         case VHOST_USER_SLAVE_IOTLB_MSG:
388                 /* not supported - VIRTIO_F_ACCESS_PLATFORM */
389         case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
390                 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
391         default:
392                 vu_err(vu_dev, "unexpected slave request %d\n",
393                        msg.msg.header.request);
394         }
395
396         if (ev && !vu_dev->suspended)
397                 time_travel_add_irq_event(ev);
398
399         if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
400                 vhost_user_reply(vu_dev, &msg.msg, response);
401
402         return IRQ_HANDLED;
403 }
404
405 static irqreturn_t vu_req_interrupt(int irq, void *data)
406 {
407         struct virtio_uml_device *vu_dev = data;
408         irqreturn_t ret = IRQ_HANDLED;
409
410         if (!um_irq_timetravel_handler_used())
411                 ret = vu_req_read_message(vu_dev, NULL);
412
413         if (vu_dev->vq_irq_vq_map) {
414                 struct virtqueue *vq;
415
416                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
417                         if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
418                                 vring_interrupt(0 /* ignored */, vq);
419                 }
420                 vu_dev->vq_irq_vq_map = 0;
421         } else if (vu_dev->config_changed_irq) {
422                 virtio_config_changed(&vu_dev->vdev);
423                 vu_dev->config_changed_irq = false;
424         }
425
426         return ret;
427 }
428
429 static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
430                                           struct time_travel_event *ev)
431 {
432         vu_req_read_message(data, ev);
433 }
434
435 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
436 {
437         int rc, req_fds[2];
438
439         /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
440         rc = os_pipe(req_fds, true, true);
441         if (rc < 0)
442                 return rc;
443         vu_dev->req_fd = req_fds[0];
444
445         rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
446                                vu_req_interrupt, IRQF_SHARED,
447                                vu_dev->pdev->name, vu_dev,
448                                vu_req_interrupt_comm_handler);
449         if (rc < 0)
450                 goto err_close;
451
452         vu_dev->irq = rc;
453
454         rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
455                                            req_fds[1]);
456         if (rc)
457                 goto err_free_irq;
458
459         goto out;
460
461 err_free_irq:
462         um_free_irq(vu_dev->irq, vu_dev);
463 err_close:
464         os_close_file(req_fds[0]);
465 out:
466         /* Close unused write end of request fds */
467         os_close_file(req_fds[1]);
468         return rc;
469 }
470
471 static int vhost_user_init(struct virtio_uml_device *vu_dev)
472 {
473         int rc = vhost_user_set_owner(vu_dev);
474
475         if (rc)
476                 return rc;
477         rc = vhost_user_get_features(vu_dev, &vu_dev->features);
478         if (rc)
479                 return rc;
480
481         if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
482                 rc = vhost_user_get_protocol_features(vu_dev,
483                                 &vu_dev->protocol_features);
484                 if (rc)
485                         return rc;
486                 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
487                 rc = vhost_user_set_protocol_features(vu_dev,
488                                 vu_dev->protocol_features);
489                 if (rc)
490                         return rc;
491         }
492
493         if (vu_dev->protocol_features &
494                         BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
495                 rc = vhost_user_init_slave_req(vu_dev);
496                 if (rc)
497                         return rc;
498         }
499
500         return 0;
501 }
502
503 static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
504                                   u32 offset, void *buf, u32 len)
505 {
506         u32 cfg_size = offset + len;
507         struct vhost_user_msg *msg;
508         size_t payload_size = sizeof(msg->payload.config) + cfg_size;
509         size_t msg_size = sizeof(msg->header) + payload_size;
510         int rc;
511
512         if (!(vu_dev->protocol_features &
513               BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
514                 return;
515
516         msg = kzalloc(msg_size, GFP_KERNEL);
517         if (!msg)
518                 return;
519         msg->header.request = VHOST_USER_GET_CONFIG;
520         msg->header.size = payload_size;
521         msg->payload.config.offset = 0;
522         msg->payload.config.size = cfg_size;
523
524         rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
525         if (rc) {
526                 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
527                        rc);
528                 goto free;
529         }
530
531         rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
532         if (rc) {
533                 vu_err(vu_dev,
534                        "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
535                        rc);
536                 goto free;
537         }
538
539         if (msg->header.size != payload_size ||
540             msg->payload.config.size != cfg_size) {
541                 rc = -EPROTO;
542                 vu_err(vu_dev,
543                        "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
544                        msg->header.size, payload_size,
545                        msg->payload.config.size, cfg_size);
546                 goto free;
547         }
548         memcpy(buf, msg->payload.config.payload + offset, len);
549
550 free:
551         kfree(msg);
552 }
553
554 static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
555                                   u32 offset, const void *buf, u32 len)
556 {
557         struct vhost_user_msg *msg;
558         size_t payload_size = sizeof(msg->payload.config) + len;
559         size_t msg_size = sizeof(msg->header) + payload_size;
560         int rc;
561
562         if (!(vu_dev->protocol_features &
563               BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
564                 return;
565
566         msg = kzalloc(msg_size, GFP_KERNEL);
567         if (!msg)
568                 return;
569         msg->header.request = VHOST_USER_SET_CONFIG;
570         msg->header.size = payload_size;
571         msg->payload.config.offset = offset;
572         msg->payload.config.size = len;
573         memcpy(msg->payload.config.payload, buf, len);
574
575         rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
576         if (rc)
577                 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
578                        rc);
579
580         kfree(msg);
581 }
582
583 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
584                                       struct vhost_user_mem_region *region_out)
585 {
586         unsigned long long mem_offset;
587         int rc = phys_mapping(addr, &mem_offset);
588
589         if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
590                 return -EFAULT;
591         *fd_out = rc;
592         region_out->guest_addr = addr;
593         region_out->user_addr = addr;
594         region_out->size = size;
595         region_out->mmap_offset = mem_offset;
596
597         /* Ensure mapping is valid for the entire region */
598         rc = phys_mapping(addr + size - 1, &mem_offset);
599         if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
600                  addr + size - 1, rc, *fd_out))
601                 return -EFAULT;
602         return 0;
603 }
604
605 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
606 {
607         struct vhost_user_msg msg = {
608                 .header.request = VHOST_USER_SET_MEM_TABLE,
609                 .header.size = sizeof(msg.payload.mem_regions),
610                 .payload.mem_regions.num = 1,
611         };
612         unsigned long reserved = uml_reserved - uml_physmem;
613         int fds[2];
614         int rc;
615
616         /*
617          * This is a bit tricky, see also the comment with setup_physmem().
618          *
619          * Essentially, setup_physmem() uses a file to mmap() our physmem,
620          * but the code and data we *already* have is omitted. To us, this
621          * is no difference, since they both become part of our address
622          * space and memory consumption. To somebody looking in from the
623          * outside, however, it is different because the part of our memory
624          * consumption that's already part of the binary (code/data) is not
625          * mapped from the file, so it's not visible to another mmap from
626          * the file descriptor.
627          *
628          * Thus, don't advertise this space to the vhost-user slave. This
629          * means that the slave will likely abort or similar when we give
630          * it an address from the hidden range, since it's not marked as
631          * a valid address, but at least that way we detect the issue and
632          * don't just have the slave read an all-zeroes buffer from the
633          * shared memory file, or write something there that we can never
634          * see (depending on the direction of the virtqueue traffic.)
635          *
636          * Since we usually don't want to use .text for virtio buffers,
637          * this effectively means that you cannot use
638          *  1) global variables, which are in the .bss and not in the shm
639          *     file-backed memory
640          *  2) the stack in some processes, depending on where they have
641          *     their stack (or maybe only no interrupt stack?)
642          *
643          * The stack is already not typically valid for DMA, so this isn't
644          * much of a restriction, but global variables might be encountered.
645          *
646          * It might be possible to fix it by copying around the data that's
647          * between bss_start and where we map the file now, but it's not
648          * something that you typically encounter with virtio drivers, so
649          * it didn't seem worthwhile.
650          */
651         rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
652                                         &fds[0],
653                                         &msg.payload.mem_regions.regions[0]);
654
655         if (rc < 0)
656                 return rc;
657         if (highmem) {
658                 msg.payload.mem_regions.num++;
659                 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
660                                 &fds[1], &msg.payload.mem_regions.regions[1]);
661                 if (rc < 0)
662                         return rc;
663         }
664
665         return vhost_user_send(vu_dev, false, &msg, fds,
666                                msg.payload.mem_regions.num);
667 }
668
669 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
670                                       u32 request, u32 index, u32 num)
671 {
672         struct vhost_user_msg msg = {
673                 .header.request = request,
674                 .header.size = sizeof(msg.payload.vring_state),
675                 .payload.vring_state.index = index,
676                 .payload.vring_state.num = num,
677         };
678
679         return vhost_user_send(vu_dev, false, &msg, NULL, 0);
680 }
681
682 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
683                                     u32 index, u32 num)
684 {
685         return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
686                                           index, num);
687 }
688
689 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
690                                      u32 index, u32 offset)
691 {
692         return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
693                                           index, offset);
694 }
695
696 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
697                                      u32 index, u64 desc, u64 used, u64 avail,
698                                      u64 log)
699 {
700         struct vhost_user_msg msg = {
701                 .header.request = VHOST_USER_SET_VRING_ADDR,
702                 .header.size = sizeof(msg.payload.vring_addr),
703                 .payload.vring_addr.index = index,
704                 .payload.vring_addr.desc = desc,
705                 .payload.vring_addr.used = used,
706                 .payload.vring_addr.avail = avail,
707                 .payload.vring_addr.log = log,
708         };
709
710         return vhost_user_send(vu_dev, false, &msg, NULL, 0);
711 }
712
713 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
714                                    u32 request, int index, int fd)
715 {
716         struct vhost_user_msg msg = {
717                 .header.request = request,
718                 .header.size = sizeof(msg.payload.integer),
719                 .payload.integer = index,
720         };
721
722         if (index & ~VHOST_USER_VRING_INDEX_MASK)
723                 return -EINVAL;
724         if (fd < 0) {
725                 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
726                 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
727         }
728         return vhost_user_send(vu_dev, false, &msg, &fd, 1);
729 }
730
731 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
732                                      int index, int fd)
733 {
734         return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
735                                        index, fd);
736 }
737
738 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
739                                      int index, int fd)
740 {
741         return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
742                                        index, fd);
743 }
744
745 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
746                                        u32 index, bool enable)
747 {
748         if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
749                 return 0;
750
751         return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
752                                           index, enable);
753 }
754
755
756 /* Virtio interface */
757
758 static bool vu_notify(struct virtqueue *vq)
759 {
760         struct virtio_uml_vq_info *info = vq->priv;
761         const uint64_t n = 1;
762         int rc;
763
764         if (info->suspended)
765                 return true;
766
767         time_travel_propagate_time();
768
769         if (info->kick_fd < 0) {
770                 struct virtio_uml_device *vu_dev;
771
772                 vu_dev = to_virtio_uml_device(vq->vdev);
773
774                 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
775                                                   vq->index, 0) == 0;
776         }
777
778         do {
779                 rc = os_write_file(info->kick_fd, &n, sizeof(n));
780         } while (rc == -EINTR);
781         return !WARN(rc != sizeof(n), "write returned %d\n", rc);
782 }
783
784 static irqreturn_t vu_interrupt(int irq, void *opaque)
785 {
786         struct virtqueue *vq = opaque;
787         struct virtio_uml_vq_info *info = vq->priv;
788         uint64_t n;
789         int rc;
790         irqreturn_t ret = IRQ_NONE;
791
792         do {
793                 rc = os_read_file(info->call_fd, &n, sizeof(n));
794                 if (rc == sizeof(n))
795                         ret |= vring_interrupt(irq, vq);
796         } while (rc == sizeof(n) || rc == -EINTR);
797         WARN(rc != -EAGAIN, "read returned %d\n", rc);
798         return ret;
799 }
800
801
802 static void vu_get(struct virtio_device *vdev, unsigned offset,
803                    void *buf, unsigned len)
804 {
805         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
806
807         vhost_user_get_config(vu_dev, offset, buf, len);
808 }
809
810 static void vu_set(struct virtio_device *vdev, unsigned offset,
811                    const void *buf, unsigned len)
812 {
813         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
814
815         vhost_user_set_config(vu_dev, offset, buf, len);
816 }
817
818 static u8 vu_get_status(struct virtio_device *vdev)
819 {
820         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
821
822         return vu_dev->status;
823 }
824
825 static void vu_set_status(struct virtio_device *vdev, u8 status)
826 {
827         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
828
829         vu_dev->status = status;
830 }
831
832 static void vu_reset(struct virtio_device *vdev)
833 {
834         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
835
836         vu_dev->status = 0;
837 }
838
839 static void vu_del_vq(struct virtqueue *vq)
840 {
841         struct virtio_uml_vq_info *info = vq->priv;
842
843         if (info->call_fd >= 0) {
844                 struct virtio_uml_device *vu_dev;
845
846                 vu_dev = to_virtio_uml_device(vq->vdev);
847
848                 um_free_irq(vu_dev->irq, vq);
849                 os_close_file(info->call_fd);
850         }
851
852         if (info->kick_fd >= 0)
853                 os_close_file(info->kick_fd);
854
855         vring_del_virtqueue(vq);
856         kfree(info);
857 }
858
859 static void vu_del_vqs(struct virtio_device *vdev)
860 {
861         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
862         struct virtqueue *vq, *n;
863         u64 features;
864
865         /* Note: reverse order as a workaround to a decoding bug in snabb */
866         list_for_each_entry_reverse(vq, &vdev->vqs, list)
867                 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
868
869         /* Ensure previous messages have been processed */
870         WARN_ON(vhost_user_get_features(vu_dev, &features));
871
872         list_for_each_entry_safe(vq, n, &vdev->vqs, list)
873                 vu_del_vq(vq);
874 }
875
876 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
877                                struct virtqueue *vq)
878 {
879         struct virtio_uml_vq_info *info = vq->priv;
880         int call_fds[2];
881         int rc;
882
883         /* no call FD needed/desired in this case */
884         if (vu_dev->protocol_features &
885                         BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
886             vu_dev->protocol_features &
887                         BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
888                 info->call_fd = -1;
889                 return 0;
890         }
891
892         /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
893         rc = os_pipe(call_fds, true, true);
894         if (rc < 0)
895                 return rc;
896
897         info->call_fd = call_fds[0];
898         rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
899                             vu_interrupt, IRQF_SHARED, info->name, vq);
900         if (rc < 0)
901                 goto close_both;
902
903         rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
904         if (rc)
905                 goto release_irq;
906
907         goto out;
908
909 release_irq:
910         um_free_irq(vu_dev->irq, vq);
911 close_both:
912         os_close_file(call_fds[0]);
913 out:
914         /* Close (unused) write end of call fds */
915         os_close_file(call_fds[1]);
916
917         return rc;
918 }
919
920 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
921                                      unsigned index, vq_callback_t *callback,
922                                      const char *name, bool ctx)
923 {
924         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
925         struct platform_device *pdev = vu_dev->pdev;
926         struct virtio_uml_vq_info *info;
927         struct virtqueue *vq;
928         int num = MAX_SUPPORTED_QUEUE_SIZE;
929         int rc;
930
931         info = kzalloc(sizeof(*info), GFP_KERNEL);
932         if (!info) {
933                 rc = -ENOMEM;
934                 goto error_kzalloc;
935         }
936         snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
937                  pdev->id, name);
938
939         vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
940                                     ctx, vu_notify, callback, info->name);
941         if (!vq) {
942                 rc = -ENOMEM;
943                 goto error_create;
944         }
945         vq->priv = info;
946         num = virtqueue_get_vring_size(vq);
947
948         if (vu_dev->protocol_features &
949                         BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
950                 info->kick_fd = -1;
951         } else {
952                 rc = os_eventfd(0, 0);
953                 if (rc < 0)
954                         goto error_kick;
955                 info->kick_fd = rc;
956         }
957
958         rc = vu_setup_vq_call_fd(vu_dev, vq);
959         if (rc)
960                 goto error_call;
961
962         rc = vhost_user_set_vring_num(vu_dev, index, num);
963         if (rc)
964                 goto error_setup;
965
966         rc = vhost_user_set_vring_base(vu_dev, index, 0);
967         if (rc)
968                 goto error_setup;
969
970         rc = vhost_user_set_vring_addr(vu_dev, index,
971                                        virtqueue_get_desc_addr(vq),
972                                        virtqueue_get_used_addr(vq),
973                                        virtqueue_get_avail_addr(vq),
974                                        (u64) -1);
975         if (rc)
976                 goto error_setup;
977
978         return vq;
979
980 error_setup:
981         if (info->call_fd >= 0) {
982                 um_free_irq(vu_dev->irq, vq);
983                 os_close_file(info->call_fd);
984         }
985 error_call:
986         if (info->kick_fd >= 0)
987                 os_close_file(info->kick_fd);
988 error_kick:
989         vring_del_virtqueue(vq);
990 error_create:
991         kfree(info);
992 error_kzalloc:
993         return ERR_PTR(rc);
994 }
995
996 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
997                        struct virtqueue *vqs[], vq_callback_t *callbacks[],
998                        const char * const names[], const bool *ctx,
999                        struct irq_affinity *desc)
1000 {
1001         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1002         int i, queue_idx = 0, rc;
1003         struct virtqueue *vq;
1004
1005         /* not supported for now */
1006         if (WARN_ON(nvqs > 64))
1007                 return -EINVAL;
1008
1009         rc = vhost_user_set_mem_table(vu_dev);
1010         if (rc)
1011                 return rc;
1012
1013         for (i = 0; i < nvqs; ++i) {
1014                 if (!names[i]) {
1015                         vqs[i] = NULL;
1016                         continue;
1017                 }
1018
1019                 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1020                                      ctx ? ctx[i] : false);
1021                 if (IS_ERR(vqs[i])) {
1022                         rc = PTR_ERR(vqs[i]);
1023                         goto error_setup;
1024                 }
1025         }
1026
1027         list_for_each_entry(vq, &vdev->vqs, list) {
1028                 struct virtio_uml_vq_info *info = vq->priv;
1029
1030                 if (info->kick_fd >= 0) {
1031                         rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1032                                                        info->kick_fd);
1033                         if (rc)
1034                                 goto error_setup;
1035                 }
1036
1037                 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1038                 if (rc)
1039                         goto error_setup;
1040         }
1041
1042         return 0;
1043
1044 error_setup:
1045         vu_del_vqs(vdev);
1046         return rc;
1047 }
1048
1049 static u64 vu_get_features(struct virtio_device *vdev)
1050 {
1051         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1052
1053         return vu_dev->features;
1054 }
1055
1056 static int vu_finalize_features(struct virtio_device *vdev)
1057 {
1058         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1059         u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1060
1061         vring_transport_features(vdev);
1062         vu_dev->features = vdev->features | supported;
1063
1064         return vhost_user_set_features(vu_dev, vu_dev->features);
1065 }
1066
1067 static const char *vu_bus_name(struct virtio_device *vdev)
1068 {
1069         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1070
1071         return vu_dev->pdev->name;
1072 }
1073
1074 static const struct virtio_config_ops virtio_uml_config_ops = {
1075         .get = vu_get,
1076         .set = vu_set,
1077         .get_status = vu_get_status,
1078         .set_status = vu_set_status,
1079         .reset = vu_reset,
1080         .find_vqs = vu_find_vqs,
1081         .del_vqs = vu_del_vqs,
1082         .get_features = vu_get_features,
1083         .finalize_features = vu_finalize_features,
1084         .bus_name = vu_bus_name,
1085 };
1086
1087 static void virtio_uml_release_dev(struct device *d)
1088 {
1089         struct virtio_device *vdev =
1090                         container_of(d, struct virtio_device, dev);
1091         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1092
1093         /* might not have been opened due to not negotiating the feature */
1094         if (vu_dev->req_fd >= 0) {
1095                 um_free_irq(vu_dev->irq, vu_dev);
1096                 os_close_file(vu_dev->req_fd);
1097         }
1098
1099         os_close_file(vu_dev->sock);
1100         kfree(vu_dev);
1101 }
1102
1103 void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
1104                                   bool no_vq_suspend)
1105 {
1106         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1107
1108         if (WARN_ON(vdev->config != &virtio_uml_config_ops))
1109                 return;
1110
1111         vu_dev->no_vq_suspend = no_vq_suspend;
1112         dev_info(&vdev->dev, "%sabled VQ suspend\n",
1113                  no_vq_suspend ? "dis" : "en");
1114 }
1115
1116 /* Platform device */
1117
1118 static int virtio_uml_probe(struct platform_device *pdev)
1119 {
1120         struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1121         struct virtio_uml_device *vu_dev;
1122         int rc;
1123
1124         if (!pdata)
1125                 return -EINVAL;
1126
1127         vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
1128         if (!vu_dev)
1129                 return -ENOMEM;
1130
1131         vu_dev->vdev.dev.parent = &pdev->dev;
1132         vu_dev->vdev.dev.release = virtio_uml_release_dev;
1133         vu_dev->vdev.config = &virtio_uml_config_ops;
1134         vu_dev->vdev.id.device = pdata->virtio_device_id;
1135         vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1136         vu_dev->pdev = pdev;
1137         vu_dev->req_fd = -1;
1138
1139         do {
1140                 rc = os_connect_socket(pdata->socket_path);
1141         } while (rc == -EINTR);
1142         if (rc < 0)
1143                 goto error_free;
1144         vu_dev->sock = rc;
1145
1146         spin_lock_init(&vu_dev->sock_lock);
1147
1148         rc = vhost_user_init(vu_dev);
1149         if (rc)
1150                 goto error_init;
1151
1152         platform_set_drvdata(pdev, vu_dev);
1153
1154         device_set_wakeup_capable(&vu_dev->vdev.dev, true);
1155
1156         rc = register_virtio_device(&vu_dev->vdev);
1157         if (rc)
1158                 put_device(&vu_dev->vdev.dev);
1159         vu_dev->registered = 1;
1160         return rc;
1161
1162 error_init:
1163         os_close_file(vu_dev->sock);
1164 error_free:
1165         kfree(vu_dev);
1166         return rc;
1167 }
1168
1169 static int virtio_uml_remove(struct platform_device *pdev)
1170 {
1171         struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1172
1173         unregister_virtio_device(&vu_dev->vdev);
1174         return 0;
1175 }
1176
1177 /* Command line device list */
1178
1179 static void vu_cmdline_release_dev(struct device *d)
1180 {
1181 }
1182
1183 static struct device vu_cmdline_parent = {
1184         .init_name = "virtio-uml-cmdline",
1185         .release = vu_cmdline_release_dev,
1186 };
1187
1188 static bool vu_cmdline_parent_registered;
1189 static int vu_cmdline_id;
1190
1191 static int vu_unregister_cmdline_device(struct device *dev, void *data)
1192 {
1193         struct platform_device *pdev = to_platform_device(dev);
1194         struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1195
1196         kfree(pdata->socket_path);
1197         platform_device_unregister(pdev);
1198         return 0;
1199 }
1200
1201 static void vu_conn_broken(struct work_struct *wk)
1202 {
1203         struct virtio_uml_platform_data *pdata;
1204
1205         pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1206         vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1207 }
1208
1209 static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1210 {
1211         const char *ids = strchr(device, ':');
1212         unsigned int virtio_device_id;
1213         int processed, consumed, err;
1214         char *socket_path;
1215         struct virtio_uml_platform_data pdata, *ppdata;
1216         struct platform_device *pdev;
1217
1218         if (!ids || ids == device)
1219                 return -EINVAL;
1220
1221         processed = sscanf(ids, ":%u%n:%d%n",
1222                            &virtio_device_id, &consumed,
1223                            &vu_cmdline_id, &consumed);
1224
1225         if (processed < 1 || ids[consumed])
1226                 return -EINVAL;
1227
1228         if (!vu_cmdline_parent_registered) {
1229                 err = device_register(&vu_cmdline_parent);
1230                 if (err) {
1231                         pr_err("Failed to register parent device!\n");
1232                         put_device(&vu_cmdline_parent);
1233                         return err;
1234                 }
1235                 vu_cmdline_parent_registered = true;
1236         }
1237
1238         socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1239         if (!socket_path)
1240                 return -ENOMEM;
1241
1242         pdata.virtio_device_id = (u32) virtio_device_id;
1243         pdata.socket_path = socket_path;
1244
1245         pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1246                 vu_cmdline_id, virtio_device_id, socket_path);
1247
1248         pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1249                                              vu_cmdline_id++, &pdata,
1250                                              sizeof(pdata));
1251         err = PTR_ERR_OR_ZERO(pdev);
1252         if (err)
1253                 goto free;
1254
1255         ppdata = pdev->dev.platform_data;
1256         ppdata->pdev = pdev;
1257         INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1258
1259         return 0;
1260
1261 free:
1262         kfree(socket_path);
1263         return err;
1264 }
1265
1266 static int vu_cmdline_get_device(struct device *dev, void *data)
1267 {
1268         struct platform_device *pdev = to_platform_device(dev);
1269         struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1270         char *buffer = data;
1271         unsigned int len = strlen(buffer);
1272
1273         snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1274                  pdata->socket_path, pdata->virtio_device_id, pdev->id);
1275         return 0;
1276 }
1277
1278 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1279 {
1280         buffer[0] = '\0';
1281         if (vu_cmdline_parent_registered)
1282                 device_for_each_child(&vu_cmdline_parent, buffer,
1283                                       vu_cmdline_get_device);
1284         return strlen(buffer) + 1;
1285 }
1286
1287 static const struct kernel_param_ops vu_cmdline_param_ops = {
1288         .set = vu_cmdline_set,
1289         .get = vu_cmdline_get,
1290 };
1291
1292 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1293 __uml_help(vu_cmdline_param_ops,
1294 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1295 "    Configure a virtio device over a vhost-user socket.\n"
1296 "    See virtio_ids.h for a list of possible virtio device id values.\n"
1297 "    Optionally use a specific platform_device id.\n\n"
1298 );
1299
1300
1301 static void vu_unregister_cmdline_devices(void)
1302 {
1303         if (vu_cmdline_parent_registered) {
1304                 device_for_each_child(&vu_cmdline_parent, NULL,
1305                                       vu_unregister_cmdline_device);
1306                 device_unregister(&vu_cmdline_parent);
1307                 vu_cmdline_parent_registered = false;
1308         }
1309 }
1310
1311 /* Platform driver */
1312
1313 static const struct of_device_id virtio_uml_match[] = {
1314         { .compatible = "virtio,uml", },
1315         { }
1316 };
1317 MODULE_DEVICE_TABLE(of, virtio_uml_match);
1318
1319 static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
1320 {
1321         struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1322
1323         if (!vu_dev->no_vq_suspend) {
1324                 struct virtqueue *vq;
1325
1326                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1327                         struct virtio_uml_vq_info *info = vq->priv;
1328
1329                         info->suspended = true;
1330                         vhost_user_set_vring_enable(vu_dev, vq->index, false);
1331                 }
1332         }
1333
1334         if (!device_may_wakeup(&vu_dev->vdev.dev)) {
1335                 vu_dev->suspended = true;
1336                 return 0;
1337         }
1338
1339         return irq_set_irq_wake(vu_dev->irq, 1);
1340 }
1341
1342 static int virtio_uml_resume(struct platform_device *pdev)
1343 {
1344         struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1345
1346         if (!vu_dev->no_vq_suspend) {
1347                 struct virtqueue *vq;
1348
1349                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1350                         struct virtio_uml_vq_info *info = vq->priv;
1351
1352                         info->suspended = false;
1353                         vhost_user_set_vring_enable(vu_dev, vq->index, true);
1354                 }
1355         }
1356
1357         vu_dev->suspended = false;
1358
1359         if (!device_may_wakeup(&vu_dev->vdev.dev))
1360                 return 0;
1361
1362         return irq_set_irq_wake(vu_dev->irq, 0);
1363 }
1364
1365 static struct platform_driver virtio_uml_driver = {
1366         .probe = virtio_uml_probe,
1367         .remove = virtio_uml_remove,
1368         .driver = {
1369                 .name = "virtio-uml",
1370                 .of_match_table = virtio_uml_match,
1371         },
1372         .suspend = virtio_uml_suspend,
1373         .resume = virtio_uml_resume,
1374 };
1375
1376 static int __init virtio_uml_init(void)
1377 {
1378         return platform_driver_register(&virtio_uml_driver);
1379 }
1380
1381 static void __exit virtio_uml_exit(void)
1382 {
1383         platform_driver_unregister(&virtio_uml_driver);
1384         vu_unregister_cmdline_devices();
1385 }
1386
1387 module_init(virtio_uml_init);
1388 module_exit(virtio_uml_exit);
1389 __uml_exitcall(virtio_uml_exit);
1390
1391 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1392 MODULE_LICENSE("GPL");