Merge branches 'acpi-scan' and 'acpi-prm'
[linux-2.6-microblaze.git] / arch / um / drivers / virtio_uml.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtio vhost-user driver
4  *
5  * Copyright(c) 2019 Intel Corporation
6  *
7  * This driver allows virtio devices to be used over a vhost-user socket.
8  *
9  * Guest devices can be instantiated by kernel module or command line
10  * parameters. One device will be created for each parameter. Syntax:
11  *
12  *              virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13  * where:
14  *              <socket>        := vhost-user socket path to connect
15  *              <virtio_id>     := virtio device id (as in virtio_ids.h)
16  *              <platform_id>   := (optional) platform device id
17  *
18  * example:
19  *              virtio_uml.device=/var/uml.socket:1
20  *
21  * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22  */
23 #include <linux/module.h>
24 #include <linux/platform_device.h>
25 #include <linux/slab.h>
26 #include <linux/virtio.h>
27 #include <linux/virtio_config.h>
28 #include <linux/virtio_ring.h>
29 #include <linux/time-internal.h>
30 #include <shared/as-layout.h>
31 #include <irq_kern.h>
32 #include <init.h>
33 #include <os.h>
34 #include "vhost_user.h"
35
36 #define MAX_SUPPORTED_QUEUE_SIZE        256
37
38 #define to_virtio_uml_device(_vdev) \
39         container_of(_vdev, struct virtio_uml_device, vdev)
40
41 struct virtio_uml_platform_data {
42         u32 virtio_device_id;
43         const char *socket_path;
44         struct work_struct conn_broken_wk;
45         struct platform_device *pdev;
46 };
47
48 struct virtio_uml_device {
49         struct virtio_device vdev;
50         struct platform_device *pdev;
51
52         spinlock_t sock_lock;
53         int sock, req_fd, irq;
54         u64 features;
55         u64 protocol_features;
56         u8 status;
57         u8 registered:1;
58         u8 suspended:1;
59         u8 no_vq_suspend:1;
60
61         u8 config_changed_irq:1;
62         uint64_t vq_irq_vq_map;
63 };
64
65 struct virtio_uml_vq_info {
66         int kick_fd, call_fd;
67         char name[32];
68         bool suspended;
69 };
70
71 extern unsigned long long physmem_size, highmem;
72
73 #define vu_err(vu_dev, ...)     dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
74
75 /* Vhost-user protocol */
76
77 static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
78                             const int *fds, unsigned int fds_num)
79 {
80         int rc;
81
82         do {
83                 rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
84                 if (rc > 0) {
85                         buf += rc;
86                         len -= rc;
87                         fds = NULL;
88                         fds_num = 0;
89                 }
90         } while (len && (rc >= 0 || rc == -EINTR));
91
92         if (rc < 0)
93                 return rc;
94         return 0;
95 }
96
97 static int full_read(int fd, void *buf, int len, bool abortable)
98 {
99         int rc;
100
101         if (!len)
102                 return 0;
103
104         do {
105                 rc = os_read_file(fd, buf, len);
106                 if (rc > 0) {
107                         buf += rc;
108                         len -= rc;
109                 }
110         } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
111
112         if (rc < 0)
113                 return rc;
114         if (rc == 0)
115                 return -ECONNRESET;
116         return 0;
117 }
118
119 static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
120 {
121         return full_read(fd, msg, sizeof(msg->header), true);
122 }
123
124 static int vhost_user_recv(struct virtio_uml_device *vu_dev,
125                            int fd, struct vhost_user_msg *msg,
126                            size_t max_payload_size, bool wait)
127 {
128         size_t size;
129         int rc;
130
131         /*
132          * In virtio time-travel mode, we're handling all the vhost-user
133          * FDs by polling them whenever appropriate. However, we may get
134          * into a situation where we're sending out an interrupt message
135          * to a device (e.g. a net device) and need to handle a simulation
136          * time message while doing so, e.g. one that tells us to update
137          * our idea of how long we can run without scheduling.
138          *
139          * Thus, we need to not just read() from the given fd, but need
140          * to also handle messages for the simulation time - this function
141          * does that for us while waiting for the given fd to be readable.
142          */
143         if (wait)
144                 time_travel_wait_readable(fd);
145
146         rc = vhost_user_recv_header(fd, msg);
147
148         if (rc == -ECONNRESET && vu_dev->registered) {
149                 struct virtio_uml_platform_data *pdata;
150
151                 pdata = vu_dev->pdev->dev.platform_data;
152
153                 virtio_break_device(&vu_dev->vdev);
154                 schedule_work(&pdata->conn_broken_wk);
155         }
156         if (rc)
157                 return rc;
158         size = msg->header.size;
159         if (size > max_payload_size)
160                 return -EPROTO;
161         return full_read(fd, &msg->payload, size, false);
162 }
163
164 static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
165                                 struct vhost_user_msg *msg,
166                                 size_t max_payload_size)
167 {
168         int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
169                                  max_payload_size, true);
170
171         if (rc)
172                 return rc;
173
174         if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
175                 return -EPROTO;
176
177         return 0;
178 }
179
180 static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
181                                u64 *value)
182 {
183         struct vhost_user_msg msg;
184         int rc = vhost_user_recv_resp(vu_dev, &msg,
185                                       sizeof(msg.payload.integer));
186
187         if (rc)
188                 return rc;
189         if (msg.header.size != sizeof(msg.payload.integer))
190                 return -EPROTO;
191         *value = msg.payload.integer;
192         return 0;
193 }
194
195 static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
196                                struct vhost_user_msg *msg,
197                                size_t max_payload_size)
198 {
199         int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
200                                  max_payload_size, false);
201
202         if (rc)
203                 return rc;
204
205         if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
206                         VHOST_USER_VERSION)
207                 return -EPROTO;
208
209         return 0;
210 }
211
212 static int vhost_user_send(struct virtio_uml_device *vu_dev,
213                            bool need_response, struct vhost_user_msg *msg,
214                            int *fds, size_t num_fds)
215 {
216         size_t size = sizeof(msg->header) + msg->header.size;
217         unsigned long flags;
218         bool request_ack;
219         int rc;
220
221         msg->header.flags |= VHOST_USER_VERSION;
222
223         /*
224          * The need_response flag indicates that we already need a response,
225          * e.g. to read the features. In these cases, don't request an ACK as
226          * it is meaningless. Also request an ACK only if supported.
227          */
228         request_ack = !need_response;
229         if (!(vu_dev->protocol_features &
230                         BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
231                 request_ack = false;
232
233         if (request_ack)
234                 msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
235
236         spin_lock_irqsave(&vu_dev->sock_lock, flags);
237         rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
238         if (rc < 0)
239                 goto out;
240
241         if (request_ack) {
242                 uint64_t status;
243
244                 rc = vhost_user_recv_u64(vu_dev, &status);
245                 if (rc)
246                         goto out;
247
248                 if (status) {
249                         vu_err(vu_dev, "slave reports error: %llu\n", status);
250                         rc = -EIO;
251                         goto out;
252                 }
253         }
254
255 out:
256         spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
257         return rc;
258 }
259
260 static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
261                                       bool need_response, u32 request)
262 {
263         struct vhost_user_msg msg = {
264                 .header.request = request,
265         };
266
267         return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
268 }
269
270 static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
271                                          u32 request, int fd)
272 {
273         struct vhost_user_msg msg = {
274                 .header.request = request,
275         };
276
277         return vhost_user_send(vu_dev, false, &msg, &fd, 1);
278 }
279
280 static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
281                                u32 request, u64 value)
282 {
283         struct vhost_user_msg msg = {
284                 .header.request = request,
285                 .header.size = sizeof(msg.payload.integer),
286                 .payload.integer = value,
287         };
288
289         return vhost_user_send(vu_dev, false, &msg, NULL, 0);
290 }
291
292 static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
293 {
294         return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
295 }
296
297 static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
298                                    u64 *features)
299 {
300         int rc = vhost_user_send_no_payload(vu_dev, true,
301                                             VHOST_USER_GET_FEATURES);
302
303         if (rc)
304                 return rc;
305         return vhost_user_recv_u64(vu_dev, features);
306 }
307
308 static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
309                                    u64 features)
310 {
311         return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
312 }
313
314 static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
315                                             u64 *protocol_features)
316 {
317         int rc = vhost_user_send_no_payload(vu_dev, true,
318                         VHOST_USER_GET_PROTOCOL_FEATURES);
319
320         if (rc)
321                 return rc;
322         return vhost_user_recv_u64(vu_dev, protocol_features);
323 }
324
325 static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
326                                             u64 protocol_features)
327 {
328         return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
329                                    protocol_features);
330 }
331
332 static void vhost_user_reply(struct virtio_uml_device *vu_dev,
333                              struct vhost_user_msg *msg, int response)
334 {
335         struct vhost_user_msg reply = {
336                 .payload.integer = response,
337         };
338         size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
339         int rc;
340
341         reply.header = msg->header;
342         reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
343         reply.header.flags |= VHOST_USER_FLAG_REPLY;
344         reply.header.size = sizeof(reply.payload.integer);
345
346         rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
347
348         if (rc)
349                 vu_err(vu_dev,
350                        "sending reply to slave request failed: %d (size %zu)\n",
351                        rc, size);
352 }
353
354 static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev,
355                                        struct time_travel_event *ev)
356 {
357         struct virtqueue *vq;
358         int response = 1;
359         struct {
360                 struct vhost_user_msg msg;
361                 u8 extra_payload[512];
362         } msg;
363         int rc;
364
365         rc = vhost_user_recv_req(vu_dev, &msg.msg,
366                                  sizeof(msg.msg.payload) +
367                                  sizeof(msg.extra_payload));
368
369         if (rc)
370                 return IRQ_NONE;
371
372         switch (msg.msg.header.request) {
373         case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
374                 vu_dev->config_changed_irq = true;
375                 response = 0;
376                 break;
377         case VHOST_USER_SLAVE_VRING_CALL:
378                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
379                         if (vq->index == msg.msg.payload.vring_state.index) {
380                                 response = 0;
381                                 vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index);
382                                 break;
383                         }
384                 }
385                 break;
386         case VHOST_USER_SLAVE_IOTLB_MSG:
387                 /* not supported - VIRTIO_F_ACCESS_PLATFORM */
388         case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
389                 /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
390         default:
391                 vu_err(vu_dev, "unexpected slave request %d\n",
392                        msg.msg.header.request);
393         }
394
395         if (ev && !vu_dev->suspended)
396                 time_travel_add_irq_event(ev);
397
398         if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
399                 vhost_user_reply(vu_dev, &msg.msg, response);
400
401         return IRQ_HANDLED;
402 }
403
404 static irqreturn_t vu_req_interrupt(int irq, void *data)
405 {
406         struct virtio_uml_device *vu_dev = data;
407         irqreturn_t ret = IRQ_HANDLED;
408
409         if (!um_irq_timetravel_handler_used())
410                 ret = vu_req_read_message(vu_dev, NULL);
411
412         if (vu_dev->vq_irq_vq_map) {
413                 struct virtqueue *vq;
414
415                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
416                         if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index))
417                                 vring_interrupt(0 /* ignored */, vq);
418                 }
419                 vu_dev->vq_irq_vq_map = 0;
420         } else if (vu_dev->config_changed_irq) {
421                 virtio_config_changed(&vu_dev->vdev);
422                 vu_dev->config_changed_irq = false;
423         }
424
425         return ret;
426 }
427
428 static void vu_req_interrupt_comm_handler(int irq, int fd, void *data,
429                                           struct time_travel_event *ev)
430 {
431         vu_req_read_message(data, ev);
432 }
433
434 static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
435 {
436         int rc, req_fds[2];
437
438         /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
439         rc = os_pipe(req_fds, true, true);
440         if (rc < 0)
441                 return rc;
442         vu_dev->req_fd = req_fds[0];
443
444         rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ,
445                                vu_req_interrupt, IRQF_SHARED,
446                                vu_dev->pdev->name, vu_dev,
447                                vu_req_interrupt_comm_handler);
448         if (rc < 0)
449                 goto err_close;
450
451         vu_dev->irq = rc;
452
453         rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
454                                            req_fds[1]);
455         if (rc)
456                 goto err_free_irq;
457
458         goto out;
459
460 err_free_irq:
461         um_free_irq(vu_dev->irq, vu_dev);
462 err_close:
463         os_close_file(req_fds[0]);
464 out:
465         /* Close unused write end of request fds */
466         os_close_file(req_fds[1]);
467         return rc;
468 }
469
470 static int vhost_user_init(struct virtio_uml_device *vu_dev)
471 {
472         int rc = vhost_user_set_owner(vu_dev);
473
474         if (rc)
475                 return rc;
476         rc = vhost_user_get_features(vu_dev, &vu_dev->features);
477         if (rc)
478                 return rc;
479
480         if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
481                 rc = vhost_user_get_protocol_features(vu_dev,
482                                 &vu_dev->protocol_features);
483                 if (rc)
484                         return rc;
485                 vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
486                 rc = vhost_user_set_protocol_features(vu_dev,
487                                 vu_dev->protocol_features);
488                 if (rc)
489                         return rc;
490         }
491
492         if (vu_dev->protocol_features &
493                         BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
494                 rc = vhost_user_init_slave_req(vu_dev);
495                 if (rc)
496                         return rc;
497         }
498
499         return 0;
500 }
501
502 static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
503                                   u32 offset, void *buf, u32 len)
504 {
505         u32 cfg_size = offset + len;
506         struct vhost_user_msg *msg;
507         size_t payload_size = sizeof(msg->payload.config) + cfg_size;
508         size_t msg_size = sizeof(msg->header) + payload_size;
509         int rc;
510
511         if (!(vu_dev->protocol_features &
512               BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
513                 return;
514
515         msg = kzalloc(msg_size, GFP_KERNEL);
516         if (!msg)
517                 return;
518         msg->header.request = VHOST_USER_GET_CONFIG;
519         msg->header.size = payload_size;
520         msg->payload.config.offset = 0;
521         msg->payload.config.size = cfg_size;
522
523         rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
524         if (rc) {
525                 vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
526                        rc);
527                 goto free;
528         }
529
530         rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
531         if (rc) {
532                 vu_err(vu_dev,
533                        "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
534                        rc);
535                 goto free;
536         }
537
538         if (msg->header.size != payload_size ||
539             msg->payload.config.size != cfg_size) {
540                 rc = -EPROTO;
541                 vu_err(vu_dev,
542                        "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
543                        msg->header.size, payload_size,
544                        msg->payload.config.size, cfg_size);
545                 goto free;
546         }
547         memcpy(buf, msg->payload.config.payload + offset, len);
548
549 free:
550         kfree(msg);
551 }
552
553 static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
554                                   u32 offset, const void *buf, u32 len)
555 {
556         struct vhost_user_msg *msg;
557         size_t payload_size = sizeof(msg->payload.config) + len;
558         size_t msg_size = sizeof(msg->header) + payload_size;
559         int rc;
560
561         if (!(vu_dev->protocol_features &
562               BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
563                 return;
564
565         msg = kzalloc(msg_size, GFP_KERNEL);
566         if (!msg)
567                 return;
568         msg->header.request = VHOST_USER_SET_CONFIG;
569         msg->header.size = payload_size;
570         msg->payload.config.offset = offset;
571         msg->payload.config.size = len;
572         memcpy(msg->payload.config.payload, buf, len);
573
574         rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
575         if (rc)
576                 vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
577                        rc);
578
579         kfree(msg);
580 }
581
582 static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
583                                       struct vhost_user_mem_region *region_out)
584 {
585         unsigned long long mem_offset;
586         int rc = phys_mapping(addr, &mem_offset);
587
588         if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
589                 return -EFAULT;
590         *fd_out = rc;
591         region_out->guest_addr = addr;
592         region_out->user_addr = addr;
593         region_out->size = size;
594         region_out->mmap_offset = mem_offset;
595
596         /* Ensure mapping is valid for the entire region */
597         rc = phys_mapping(addr + size - 1, &mem_offset);
598         if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
599                  addr + size - 1, rc, *fd_out))
600                 return -EFAULT;
601         return 0;
602 }
603
604 static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
605 {
606         struct vhost_user_msg msg = {
607                 .header.request = VHOST_USER_SET_MEM_TABLE,
608                 .header.size = sizeof(msg.payload.mem_regions),
609                 .payload.mem_regions.num = 1,
610         };
611         unsigned long reserved = uml_reserved - uml_physmem;
612         int fds[2];
613         int rc;
614
615         /*
616          * This is a bit tricky, see also the comment with setup_physmem().
617          *
618          * Essentially, setup_physmem() uses a file to mmap() our physmem,
619          * but the code and data we *already* have is omitted. To us, this
620          * is no difference, since they both become part of our address
621          * space and memory consumption. To somebody looking in from the
622          * outside, however, it is different because the part of our memory
623          * consumption that's already part of the binary (code/data) is not
624          * mapped from the file, so it's not visible to another mmap from
625          * the file descriptor.
626          *
627          * Thus, don't advertise this space to the vhost-user slave. This
628          * means that the slave will likely abort or similar when we give
629          * it an address from the hidden range, since it's not marked as
630          * a valid address, but at least that way we detect the issue and
631          * don't just have the slave read an all-zeroes buffer from the
632          * shared memory file, or write something there that we can never
633          * see (depending on the direction of the virtqueue traffic.)
634          *
635          * Since we usually don't want to use .text for virtio buffers,
636          * this effectively means that you cannot use
637          *  1) global variables, which are in the .bss and not in the shm
638          *     file-backed memory
639          *  2) the stack in some processes, depending on where they have
640          *     their stack (or maybe only no interrupt stack?)
641          *
642          * The stack is already not typically valid for DMA, so this isn't
643          * much of a restriction, but global variables might be encountered.
644          *
645          * It might be possible to fix it by copying around the data that's
646          * between bss_start and where we map the file now, but it's not
647          * something that you typically encounter with virtio drivers, so
648          * it didn't seem worthwhile.
649          */
650         rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
651                                         &fds[0],
652                                         &msg.payload.mem_regions.regions[0]);
653
654         if (rc < 0)
655                 return rc;
656         if (highmem) {
657                 msg.payload.mem_regions.num++;
658                 rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
659                                 &fds[1], &msg.payload.mem_regions.regions[1]);
660                 if (rc < 0)
661                         return rc;
662         }
663
664         return vhost_user_send(vu_dev, false, &msg, fds,
665                                msg.payload.mem_regions.num);
666 }
667
668 static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
669                                       u32 request, u32 index, u32 num)
670 {
671         struct vhost_user_msg msg = {
672                 .header.request = request,
673                 .header.size = sizeof(msg.payload.vring_state),
674                 .payload.vring_state.index = index,
675                 .payload.vring_state.num = num,
676         };
677
678         return vhost_user_send(vu_dev, false, &msg, NULL, 0);
679 }
680
681 static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
682                                     u32 index, u32 num)
683 {
684         return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
685                                           index, num);
686 }
687
688 static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
689                                      u32 index, u32 offset)
690 {
691         return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
692                                           index, offset);
693 }
694
695 static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
696                                      u32 index, u64 desc, u64 used, u64 avail,
697                                      u64 log)
698 {
699         struct vhost_user_msg msg = {
700                 .header.request = VHOST_USER_SET_VRING_ADDR,
701                 .header.size = sizeof(msg.payload.vring_addr),
702                 .payload.vring_addr.index = index,
703                 .payload.vring_addr.desc = desc,
704                 .payload.vring_addr.used = used,
705                 .payload.vring_addr.avail = avail,
706                 .payload.vring_addr.log = log,
707         };
708
709         return vhost_user_send(vu_dev, false, &msg, NULL, 0);
710 }
711
712 static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
713                                    u32 request, int index, int fd)
714 {
715         struct vhost_user_msg msg = {
716                 .header.request = request,
717                 .header.size = sizeof(msg.payload.integer),
718                 .payload.integer = index,
719         };
720
721         if (index & ~VHOST_USER_VRING_INDEX_MASK)
722                 return -EINVAL;
723         if (fd < 0) {
724                 msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
725                 return vhost_user_send(vu_dev, false, &msg, NULL, 0);
726         }
727         return vhost_user_send(vu_dev, false, &msg, &fd, 1);
728 }
729
730 static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
731                                      int index, int fd)
732 {
733         return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
734                                        index, fd);
735 }
736
737 static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
738                                      int index, int fd)
739 {
740         return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
741                                        index, fd);
742 }
743
744 static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
745                                        u32 index, bool enable)
746 {
747         if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
748                 return 0;
749
750         return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
751                                           index, enable);
752 }
753
754
755 /* Virtio interface */
756
757 static bool vu_notify(struct virtqueue *vq)
758 {
759         struct virtio_uml_vq_info *info = vq->priv;
760         const uint64_t n = 1;
761         int rc;
762
763         if (info->suspended)
764                 return true;
765
766         time_travel_propagate_time();
767
768         if (info->kick_fd < 0) {
769                 struct virtio_uml_device *vu_dev;
770
771                 vu_dev = to_virtio_uml_device(vq->vdev);
772
773                 return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
774                                                   vq->index, 0) == 0;
775         }
776
777         do {
778                 rc = os_write_file(info->kick_fd, &n, sizeof(n));
779         } while (rc == -EINTR);
780         return !WARN(rc != sizeof(n), "write returned %d\n", rc);
781 }
782
783 static irqreturn_t vu_interrupt(int irq, void *opaque)
784 {
785         struct virtqueue *vq = opaque;
786         struct virtio_uml_vq_info *info = vq->priv;
787         uint64_t n;
788         int rc;
789         irqreturn_t ret = IRQ_NONE;
790
791         do {
792                 rc = os_read_file(info->call_fd, &n, sizeof(n));
793                 if (rc == sizeof(n))
794                         ret |= vring_interrupt(irq, vq);
795         } while (rc == sizeof(n) || rc == -EINTR);
796         WARN(rc != -EAGAIN, "read returned %d\n", rc);
797         return ret;
798 }
799
800
801 static void vu_get(struct virtio_device *vdev, unsigned offset,
802                    void *buf, unsigned len)
803 {
804         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
805
806         vhost_user_get_config(vu_dev, offset, buf, len);
807 }
808
809 static void vu_set(struct virtio_device *vdev, unsigned offset,
810                    const void *buf, unsigned len)
811 {
812         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
813
814         vhost_user_set_config(vu_dev, offset, buf, len);
815 }
816
817 static u8 vu_get_status(struct virtio_device *vdev)
818 {
819         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
820
821         return vu_dev->status;
822 }
823
824 static void vu_set_status(struct virtio_device *vdev, u8 status)
825 {
826         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
827
828         vu_dev->status = status;
829 }
830
831 static void vu_reset(struct virtio_device *vdev)
832 {
833         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
834
835         vu_dev->status = 0;
836 }
837
838 static void vu_del_vq(struct virtqueue *vq)
839 {
840         struct virtio_uml_vq_info *info = vq->priv;
841
842         if (info->call_fd >= 0) {
843                 struct virtio_uml_device *vu_dev;
844
845                 vu_dev = to_virtio_uml_device(vq->vdev);
846
847                 um_free_irq(vu_dev->irq, vq);
848                 os_close_file(info->call_fd);
849         }
850
851         if (info->kick_fd >= 0)
852                 os_close_file(info->kick_fd);
853
854         vring_del_virtqueue(vq);
855         kfree(info);
856 }
857
858 static void vu_del_vqs(struct virtio_device *vdev)
859 {
860         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
861         struct virtqueue *vq, *n;
862         u64 features;
863
864         /* Note: reverse order as a workaround to a decoding bug in snabb */
865         list_for_each_entry_reverse(vq, &vdev->vqs, list)
866                 WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
867
868         /* Ensure previous messages have been processed */
869         WARN_ON(vhost_user_get_features(vu_dev, &features));
870
871         list_for_each_entry_safe(vq, n, &vdev->vqs, list)
872                 vu_del_vq(vq);
873 }
874
875 static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
876                                struct virtqueue *vq)
877 {
878         struct virtio_uml_vq_info *info = vq->priv;
879         int call_fds[2];
880         int rc;
881
882         /* no call FD needed/desired in this case */
883         if (vu_dev->protocol_features &
884                         BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
885             vu_dev->protocol_features &
886                         BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
887                 info->call_fd = -1;
888                 return 0;
889         }
890
891         /* Use a pipe for call fd, since SIGIO is not supported for eventfd */
892         rc = os_pipe(call_fds, true, true);
893         if (rc < 0)
894                 return rc;
895
896         info->call_fd = call_fds[0];
897         rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ,
898                             vu_interrupt, IRQF_SHARED, info->name, vq);
899         if (rc < 0)
900                 goto close_both;
901
902         rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
903         if (rc)
904                 goto release_irq;
905
906         goto out;
907
908 release_irq:
909         um_free_irq(vu_dev->irq, vq);
910 close_both:
911         os_close_file(call_fds[0]);
912 out:
913         /* Close (unused) write end of call fds */
914         os_close_file(call_fds[1]);
915
916         return rc;
917 }
918
919 static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
920                                      unsigned index, vq_callback_t *callback,
921                                      const char *name, bool ctx)
922 {
923         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
924         struct platform_device *pdev = vu_dev->pdev;
925         struct virtio_uml_vq_info *info;
926         struct virtqueue *vq;
927         int num = MAX_SUPPORTED_QUEUE_SIZE;
928         int rc;
929
930         info = kzalloc(sizeof(*info), GFP_KERNEL);
931         if (!info) {
932                 rc = -ENOMEM;
933                 goto error_kzalloc;
934         }
935         snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
936                  pdev->id, name);
937
938         vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
939                                     ctx, vu_notify, callback, info->name);
940         if (!vq) {
941                 rc = -ENOMEM;
942                 goto error_create;
943         }
944         vq->priv = info;
945         num = virtqueue_get_vring_size(vq);
946
947         if (vu_dev->protocol_features &
948                         BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
949                 info->kick_fd = -1;
950         } else {
951                 rc = os_eventfd(0, 0);
952                 if (rc < 0)
953                         goto error_kick;
954                 info->kick_fd = rc;
955         }
956
957         rc = vu_setup_vq_call_fd(vu_dev, vq);
958         if (rc)
959                 goto error_call;
960
961         rc = vhost_user_set_vring_num(vu_dev, index, num);
962         if (rc)
963                 goto error_setup;
964
965         rc = vhost_user_set_vring_base(vu_dev, index, 0);
966         if (rc)
967                 goto error_setup;
968
969         rc = vhost_user_set_vring_addr(vu_dev, index,
970                                        virtqueue_get_desc_addr(vq),
971                                        virtqueue_get_used_addr(vq),
972                                        virtqueue_get_avail_addr(vq),
973                                        (u64) -1);
974         if (rc)
975                 goto error_setup;
976
977         return vq;
978
979 error_setup:
980         if (info->call_fd >= 0) {
981                 um_free_irq(vu_dev->irq, vq);
982                 os_close_file(info->call_fd);
983         }
984 error_call:
985         if (info->kick_fd >= 0)
986                 os_close_file(info->kick_fd);
987 error_kick:
988         vring_del_virtqueue(vq);
989 error_create:
990         kfree(info);
991 error_kzalloc:
992         return ERR_PTR(rc);
993 }
994
995 static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
996                        struct virtqueue *vqs[], vq_callback_t *callbacks[],
997                        const char * const names[], const bool *ctx,
998                        struct irq_affinity *desc)
999 {
1000         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1001         int i, queue_idx = 0, rc;
1002         struct virtqueue *vq;
1003
1004         /* not supported for now */
1005         if (WARN_ON(nvqs > 64))
1006                 return -EINVAL;
1007
1008         rc = vhost_user_set_mem_table(vu_dev);
1009         if (rc)
1010                 return rc;
1011
1012         for (i = 0; i < nvqs; ++i) {
1013                 if (!names[i]) {
1014                         vqs[i] = NULL;
1015                         continue;
1016                 }
1017
1018                 vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1019                                      ctx ? ctx[i] : false);
1020                 if (IS_ERR(vqs[i])) {
1021                         rc = PTR_ERR(vqs[i]);
1022                         goto error_setup;
1023                 }
1024         }
1025
1026         list_for_each_entry(vq, &vdev->vqs, list) {
1027                 struct virtio_uml_vq_info *info = vq->priv;
1028
1029                 if (info->kick_fd >= 0) {
1030                         rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1031                                                        info->kick_fd);
1032                         if (rc)
1033                                 goto error_setup;
1034                 }
1035
1036                 rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1037                 if (rc)
1038                         goto error_setup;
1039         }
1040
1041         return 0;
1042
1043 error_setup:
1044         vu_del_vqs(vdev);
1045         return rc;
1046 }
1047
1048 static u64 vu_get_features(struct virtio_device *vdev)
1049 {
1050         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1051
1052         return vu_dev->features;
1053 }
1054
1055 static int vu_finalize_features(struct virtio_device *vdev)
1056 {
1057         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1058         u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1059
1060         vring_transport_features(vdev);
1061         vu_dev->features = vdev->features | supported;
1062
1063         return vhost_user_set_features(vu_dev, vu_dev->features);
1064 }
1065
1066 static const char *vu_bus_name(struct virtio_device *vdev)
1067 {
1068         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1069
1070         return vu_dev->pdev->name;
1071 }
1072
1073 static const struct virtio_config_ops virtio_uml_config_ops = {
1074         .get = vu_get,
1075         .set = vu_set,
1076         .get_status = vu_get_status,
1077         .set_status = vu_set_status,
1078         .reset = vu_reset,
1079         .find_vqs = vu_find_vqs,
1080         .del_vqs = vu_del_vqs,
1081         .get_features = vu_get_features,
1082         .finalize_features = vu_finalize_features,
1083         .bus_name = vu_bus_name,
1084 };
1085
1086 static void virtio_uml_release_dev(struct device *d)
1087 {
1088         struct virtio_device *vdev =
1089                         container_of(d, struct virtio_device, dev);
1090         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1091
1092         /* might not have been opened due to not negotiating the feature */
1093         if (vu_dev->req_fd >= 0) {
1094                 um_free_irq(vu_dev->irq, vu_dev);
1095                 os_close_file(vu_dev->req_fd);
1096         }
1097
1098         os_close_file(vu_dev->sock);
1099         kfree(vu_dev);
1100 }
1101
1102 void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev,
1103                                   bool no_vq_suspend)
1104 {
1105         struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1106
1107         if (WARN_ON(vdev->config != &virtio_uml_config_ops))
1108                 return;
1109
1110         vu_dev->no_vq_suspend = no_vq_suspend;
1111         dev_info(&vdev->dev, "%sabled VQ suspend\n",
1112                  no_vq_suspend ? "dis" : "en");
1113 }
1114
1115 /* Platform device */
1116
1117 static int virtio_uml_probe(struct platform_device *pdev)
1118 {
1119         struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1120         struct virtio_uml_device *vu_dev;
1121         int rc;
1122
1123         if (!pdata)
1124                 return -EINVAL;
1125
1126         vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
1127         if (!vu_dev)
1128                 return -ENOMEM;
1129
1130         vu_dev->vdev.dev.parent = &pdev->dev;
1131         vu_dev->vdev.dev.release = virtio_uml_release_dev;
1132         vu_dev->vdev.config = &virtio_uml_config_ops;
1133         vu_dev->vdev.id.device = pdata->virtio_device_id;
1134         vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1135         vu_dev->pdev = pdev;
1136         vu_dev->req_fd = -1;
1137
1138         do {
1139                 rc = os_connect_socket(pdata->socket_path);
1140         } while (rc == -EINTR);
1141         if (rc < 0)
1142                 return rc;
1143         vu_dev->sock = rc;
1144
1145         spin_lock_init(&vu_dev->sock_lock);
1146
1147         rc = vhost_user_init(vu_dev);
1148         if (rc)
1149                 goto error_init;
1150
1151         platform_set_drvdata(pdev, vu_dev);
1152
1153         device_set_wakeup_capable(&vu_dev->vdev.dev, true);
1154
1155         rc = register_virtio_device(&vu_dev->vdev);
1156         if (rc)
1157                 put_device(&vu_dev->vdev.dev);
1158         vu_dev->registered = 1;
1159         return rc;
1160
1161 error_init:
1162         os_close_file(vu_dev->sock);
1163         return rc;
1164 }
1165
1166 static int virtio_uml_remove(struct platform_device *pdev)
1167 {
1168         struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1169
1170         unregister_virtio_device(&vu_dev->vdev);
1171         return 0;
1172 }
1173
1174 /* Command line device list */
1175
1176 static void vu_cmdline_release_dev(struct device *d)
1177 {
1178 }
1179
1180 static struct device vu_cmdline_parent = {
1181         .init_name = "virtio-uml-cmdline",
1182         .release = vu_cmdline_release_dev,
1183 };
1184
1185 static bool vu_cmdline_parent_registered;
1186 static int vu_cmdline_id;
1187
1188 static int vu_unregister_cmdline_device(struct device *dev, void *data)
1189 {
1190         struct platform_device *pdev = to_platform_device(dev);
1191         struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1192
1193         kfree(pdata->socket_path);
1194         platform_device_unregister(pdev);
1195         return 0;
1196 }
1197
1198 static void vu_conn_broken(struct work_struct *wk)
1199 {
1200         struct virtio_uml_platform_data *pdata;
1201
1202         pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1203         vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1204 }
1205
1206 static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1207 {
1208         const char *ids = strchr(device, ':');
1209         unsigned int virtio_device_id;
1210         int processed, consumed, err;
1211         char *socket_path;
1212         struct virtio_uml_platform_data pdata, *ppdata;
1213         struct platform_device *pdev;
1214
1215         if (!ids || ids == device)
1216                 return -EINVAL;
1217
1218         processed = sscanf(ids, ":%u%n:%d%n",
1219                            &virtio_device_id, &consumed,
1220                            &vu_cmdline_id, &consumed);
1221
1222         if (processed < 1 || ids[consumed])
1223                 return -EINVAL;
1224
1225         if (!vu_cmdline_parent_registered) {
1226                 err = device_register(&vu_cmdline_parent);
1227                 if (err) {
1228                         pr_err("Failed to register parent device!\n");
1229                         put_device(&vu_cmdline_parent);
1230                         return err;
1231                 }
1232                 vu_cmdline_parent_registered = true;
1233         }
1234
1235         socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1236         if (!socket_path)
1237                 return -ENOMEM;
1238
1239         pdata.virtio_device_id = (u32) virtio_device_id;
1240         pdata.socket_path = socket_path;
1241
1242         pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1243                 vu_cmdline_id, virtio_device_id, socket_path);
1244
1245         pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1246                                              vu_cmdline_id++, &pdata,
1247                                              sizeof(pdata));
1248         err = PTR_ERR_OR_ZERO(pdev);
1249         if (err)
1250                 goto free;
1251
1252         ppdata = pdev->dev.platform_data;
1253         ppdata->pdev = pdev;
1254         INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1255
1256         return 0;
1257
1258 free:
1259         kfree(socket_path);
1260         return err;
1261 }
1262
1263 static int vu_cmdline_get_device(struct device *dev, void *data)
1264 {
1265         struct platform_device *pdev = to_platform_device(dev);
1266         struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1267         char *buffer = data;
1268         unsigned int len = strlen(buffer);
1269
1270         snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1271                  pdata->socket_path, pdata->virtio_device_id, pdev->id);
1272         return 0;
1273 }
1274
1275 static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1276 {
1277         buffer[0] = '\0';
1278         if (vu_cmdline_parent_registered)
1279                 device_for_each_child(&vu_cmdline_parent, buffer,
1280                                       vu_cmdline_get_device);
1281         return strlen(buffer) + 1;
1282 }
1283
1284 static const struct kernel_param_ops vu_cmdline_param_ops = {
1285         .set = vu_cmdline_set,
1286         .get = vu_cmdline_get,
1287 };
1288
1289 device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1290 __uml_help(vu_cmdline_param_ops,
1291 "virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1292 "    Configure a virtio device over a vhost-user socket.\n"
1293 "    See virtio_ids.h for a list of possible virtio device id values.\n"
1294 "    Optionally use a specific platform_device id.\n\n"
1295 );
1296
1297
1298 static void vu_unregister_cmdline_devices(void)
1299 {
1300         if (vu_cmdline_parent_registered) {
1301                 device_for_each_child(&vu_cmdline_parent, NULL,
1302                                       vu_unregister_cmdline_device);
1303                 device_unregister(&vu_cmdline_parent);
1304                 vu_cmdline_parent_registered = false;
1305         }
1306 }
1307
1308 /* Platform driver */
1309
1310 static const struct of_device_id virtio_uml_match[] = {
1311         { .compatible = "virtio,uml", },
1312         { }
1313 };
1314 MODULE_DEVICE_TABLE(of, virtio_uml_match);
1315
1316 static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state)
1317 {
1318         struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1319
1320         if (!vu_dev->no_vq_suspend) {
1321                 struct virtqueue *vq;
1322
1323                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1324                         struct virtio_uml_vq_info *info = vq->priv;
1325
1326                         info->suspended = true;
1327                         vhost_user_set_vring_enable(vu_dev, vq->index, false);
1328                 }
1329         }
1330
1331         if (!device_may_wakeup(&vu_dev->vdev.dev)) {
1332                 vu_dev->suspended = true;
1333                 return 0;
1334         }
1335
1336         return irq_set_irq_wake(vu_dev->irq, 1);
1337 }
1338
1339 static int virtio_uml_resume(struct platform_device *pdev)
1340 {
1341         struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1342
1343         if (!vu_dev->no_vq_suspend) {
1344                 struct virtqueue *vq;
1345
1346                 virtio_device_for_each_vq((&vu_dev->vdev), vq) {
1347                         struct virtio_uml_vq_info *info = vq->priv;
1348
1349                         info->suspended = false;
1350                         vhost_user_set_vring_enable(vu_dev, vq->index, true);
1351                 }
1352         }
1353
1354         vu_dev->suspended = false;
1355
1356         if (!device_may_wakeup(&vu_dev->vdev.dev))
1357                 return 0;
1358
1359         return irq_set_irq_wake(vu_dev->irq, 0);
1360 }
1361
1362 static struct platform_driver virtio_uml_driver = {
1363         .probe = virtio_uml_probe,
1364         .remove = virtio_uml_remove,
1365         .driver = {
1366                 .name = "virtio-uml",
1367                 .of_match_table = virtio_uml_match,
1368         },
1369         .suspend = virtio_uml_suspend,
1370         .resume = virtio_uml_resume,
1371 };
1372
1373 static int __init virtio_uml_init(void)
1374 {
1375         return platform_driver_register(&virtio_uml_driver);
1376 }
1377
1378 static void __exit virtio_uml_exit(void)
1379 {
1380         platform_driver_unregister(&virtio_uml_driver);
1381         vu_unregister_cmdline_devices();
1382 }
1383
1384 module_init(virtio_uml_init);
1385 module_exit(virtio_uml_exit);
1386 __uml_exitcall(virtio_uml_exit);
1387
1388 MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1389 MODULE_LICENSE("GPL");