Merge tag 'for-linus-5.13-1' of git://github.com/cminyard/linux-ipmi
[linux-2.6-microblaze.git] / drivers / virt / acrn / ioreq.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ACRN_HSM: Handle I/O requests
4  *
5  * Copyright (C) 2020 Intel Corporation. All rights reserved.
6  *
7  * Authors:
8  *      Jason Chen CJ <jason.cj.chen@intel.com>
9  *      Fengwei Yin <fengwei.yin@intel.com>
10  */
11
12 #include <linux/interrupt.h>
13 #include <linux/io.h>
14 #include <linux/kthread.h>
15 #include <linux/mm.h>
16 #include <linux/slab.h>
17
18 #include <asm/acrn.h>
19
20 #include "acrn_drv.h"
21
22 static void ioreq_pause(void);
23 static void ioreq_resume(void);
24
25 static void ioreq_dispatcher(struct work_struct *work);
26 static struct workqueue_struct *ioreq_wq;
27 static DECLARE_WORK(ioreq_work, ioreq_dispatcher);
28
29 static inline bool has_pending_request(struct acrn_ioreq_client *client)
30 {
31         return !bitmap_empty(client->ioreqs_map, ACRN_IO_REQUEST_MAX);
32 }
33
34 static inline bool is_destroying(struct acrn_ioreq_client *client)
35 {
36         return test_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
37 }
38
39 static int ioreq_complete_request(struct acrn_vm *vm, u16 vcpu,
40                                   struct acrn_io_request *acrn_req)
41 {
42         bool polling_mode;
43         int ret = 0;
44
45         polling_mode = acrn_req->completion_polling;
46         /* Add barrier() to make sure the writes are done before completion */
47         smp_store_release(&acrn_req->processed, ACRN_IOREQ_STATE_COMPLETE);
48
49         /*
50          * To fulfill the requirement of real-time in several industry
51          * scenarios, like automotive, ACRN can run under the partition mode,
52          * in which User VMs and Service VM are bound to dedicated CPU cores.
53          * Polling mode of handling the I/O request is introduced to achieve a
54          * faster I/O request handling. In polling mode, the hypervisor polls
55          * I/O request's completion. Once an I/O request is marked as
56          * ACRN_IOREQ_STATE_COMPLETE, hypervisor resumes from the polling point
57          * to continue the I/O request flow. Thus, the completion notification
58          * from HSM of I/O request is not needed.  Please note,
59          * completion_polling needs to be read before the I/O request being
60          * marked as ACRN_IOREQ_STATE_COMPLETE to avoid racing with the
61          * hypervisor.
62          */
63         if (!polling_mode) {
64                 ret = hcall_notify_req_finish(vm->vmid, vcpu);
65                 if (ret < 0)
66                         dev_err(acrn_dev.this_device,
67                                 "Notify I/O request finished failed!\n");
68         }
69
70         return ret;
71 }
72
73 static int acrn_ioreq_complete_request(struct acrn_ioreq_client *client,
74                                        u16 vcpu,
75                                        struct acrn_io_request *acrn_req)
76 {
77         int ret;
78
79         if (vcpu >= client->vm->vcpu_num)
80                 return -EINVAL;
81
82         clear_bit(vcpu, client->ioreqs_map);
83         if (!acrn_req) {
84                 acrn_req = (struct acrn_io_request *)client->vm->ioreq_buf;
85                 acrn_req += vcpu;
86         }
87
88         ret = ioreq_complete_request(client->vm, vcpu, acrn_req);
89
90         return ret;
91 }
92
93 int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu)
94 {
95         int ret = 0;
96
97         spin_lock_bh(&vm->ioreq_clients_lock);
98         if (vm->default_client)
99                 ret = acrn_ioreq_complete_request(vm->default_client,
100                                                   vcpu, NULL);
101         spin_unlock_bh(&vm->ioreq_clients_lock);
102
103         return ret;
104 }
105
106 /**
107  * acrn_ioreq_range_add() - Add an iorange monitored by an ioreq client
108  * @client:     The ioreq client
109  * @type:       Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
110  * @start:      Start address of iorange
111  * @end:        End address of iorange
112  *
113  * Return: 0 on success, <0 on error
114  */
115 int acrn_ioreq_range_add(struct acrn_ioreq_client *client,
116                          u32 type, u64 start, u64 end)
117 {
118         struct acrn_ioreq_range *range;
119
120         if (end < start) {
121                 dev_err(acrn_dev.this_device,
122                         "Invalid IO range [0x%llx,0x%llx]\n", start, end);
123                 return -EINVAL;
124         }
125
126         range = kzalloc(sizeof(*range), GFP_KERNEL);
127         if (!range)
128                 return -ENOMEM;
129
130         range->type = type;
131         range->start = start;
132         range->end = end;
133
134         write_lock_bh(&client->range_lock);
135         list_add(&range->list, &client->range_list);
136         write_unlock_bh(&client->range_lock);
137
138         return 0;
139 }
140
141 /**
142  * acrn_ioreq_range_del() - Del an iorange monitored by an ioreq client
143  * @client:     The ioreq client
144  * @type:       Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
145  * @start:      Start address of iorange
146  * @end:        End address of iorange
147  */
148 void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
149                           u32 type, u64 start, u64 end)
150 {
151         struct acrn_ioreq_range *range;
152
153         write_lock_bh(&client->range_lock);
154         list_for_each_entry(range, &client->range_list, list) {
155                 if (type == range->type &&
156                     start == range->start &&
157                     end == range->end) {
158                         list_del(&range->list);
159                         kfree(range);
160                         break;
161                 }
162         }
163         write_unlock_bh(&client->range_lock);
164 }
165
166 /*
167  * ioreq_task() is the execution entity of handler thread of an I/O client.
168  * The handler callback of the I/O client is called within the handler thread.
169  */
170 static int ioreq_task(void *data)
171 {
172         struct acrn_ioreq_client *client = data;
173         struct acrn_io_request *req;
174         unsigned long *ioreqs_map;
175         int vcpu, ret;
176
177         /*
178          * Lockless access to ioreqs_map is safe, because
179          * 1) set_bit() and clear_bit() are atomic operations.
180          * 2) I/O requests arrives serialized. The access flow of ioreqs_map is:
181          *      set_bit() - in ioreq_work handler
182          *      Handler callback handles corresponding I/O request
183          *      clear_bit() - in handler thread (include ACRN userspace)
184          *      Mark corresponding I/O request completed
185          *      Loop again if a new I/O request occurs
186          */
187         ioreqs_map = client->ioreqs_map;
188         while (!kthread_should_stop()) {
189                 acrn_ioreq_client_wait(client);
190                 while (has_pending_request(client)) {
191                         vcpu = find_first_bit(ioreqs_map, client->vm->vcpu_num);
192                         req = client->vm->ioreq_buf->req_slot + vcpu;
193                         ret = client->handler(client, req);
194                         if (ret < 0) {
195                                 dev_err(acrn_dev.this_device,
196                                         "IO handle failure: %d\n", ret);
197                                 break;
198                         }
199                         acrn_ioreq_complete_request(client, vcpu, req);
200                 }
201         }
202
203         return 0;
204 }
205
206 /*
207  * For the non-default I/O clients, give them chance to complete the current
208  * I/O requests if there are any. For the default I/O client, it is safe to
209  * clear all pending I/O requests because the clearing request is from ACRN
210  * userspace.
211  */
212 void acrn_ioreq_request_clear(struct acrn_vm *vm)
213 {
214         struct acrn_ioreq_client *client;
215         bool has_pending = false;
216         unsigned long vcpu;
217         int retry = 10;
218
219         /*
220          * IO requests of this VM will be completed directly in
221          * acrn_ioreq_dispatch if ACRN_VM_FLAG_CLEARING_IOREQ flag is set.
222          */
223         set_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
224
225         /*
226          * acrn_ioreq_request_clear is only called in VM reset case. Simply
227          * wait 100ms in total for the IO requests' completion.
228          */
229         do {
230                 spin_lock_bh(&vm->ioreq_clients_lock);
231                 list_for_each_entry(client, &vm->ioreq_clients, list) {
232                         has_pending = has_pending_request(client);
233                         if (has_pending)
234                                 break;
235                 }
236                 spin_unlock_bh(&vm->ioreq_clients_lock);
237
238                 if (has_pending)
239                         schedule_timeout_interruptible(HZ / 100);
240         } while (has_pending && --retry > 0);
241         if (retry == 0)
242                 dev_warn(acrn_dev.this_device,
243                          "%s cannot flush pending request!\n", client->name);
244
245         /* Clear all ioreqs belonging to the default client */
246         spin_lock_bh(&vm->ioreq_clients_lock);
247         client = vm->default_client;
248         if (client) {
249                 vcpu = find_next_bit(client->ioreqs_map,
250                                      ACRN_IO_REQUEST_MAX, 0);
251                 while (vcpu < ACRN_IO_REQUEST_MAX) {
252                         acrn_ioreq_complete_request(client, vcpu, NULL);
253                         vcpu = find_next_bit(client->ioreqs_map,
254                                              ACRN_IO_REQUEST_MAX, vcpu + 1);
255                 }
256         }
257         spin_unlock_bh(&vm->ioreq_clients_lock);
258
259         /* Clear ACRN_VM_FLAG_CLEARING_IOREQ flag after the clearing */
260         clear_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
261 }
262
263 int acrn_ioreq_client_wait(struct acrn_ioreq_client *client)
264 {
265         if (client->is_default) {
266                 /*
267                  * In the default client, a user space thread waits on the
268                  * waitqueue. The is_destroying() check is used to notify user
269                  * space the client is going to be destroyed.
270                  */
271                 wait_event_interruptible(client->wq,
272                                          has_pending_request(client) ||
273                                          is_destroying(client));
274                 if (is_destroying(client))
275                         return -ENODEV;
276         } else {
277                 wait_event_interruptible(client->wq,
278                                          has_pending_request(client) ||
279                                          kthread_should_stop());
280         }
281
282         return 0;
283 }
284
285 static bool is_cfg_addr(struct acrn_io_request *req)
286 {
287         return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
288                 (req->reqs.pio_request.address == 0xcf8));
289 }
290
291 static bool is_cfg_data(struct acrn_io_request *req)
292 {
293         return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
294                 ((req->reqs.pio_request.address >= 0xcfc) &&
295                  (req->reqs.pio_request.address < (0xcfc + 4))));
296 }
297
298 /* The low 8-bit of supported pci_reg addr.*/
299 #define PCI_LOWREG_MASK  0xFC
300 /* The high 4-bit of supported pci_reg addr */
301 #define PCI_HIGHREG_MASK 0xF00
302 /* Max number of supported functions */
303 #define PCI_FUNCMAX     7
304 /* Max number of supported slots */
305 #define PCI_SLOTMAX     31
306 /* Max number of supported buses */
307 #define PCI_BUSMAX      255
308 #define CONF1_ENABLE    0x80000000UL
309 /*
310  * A PCI configuration space access via PIO 0xCF8 and 0xCFC normally has two
311  * following steps:
312  *   1) writes address into 0xCF8 port
313  *   2) accesses data in/from 0xCFC
314  * This function combines such paired PCI configuration space I/O requests into
315  * one ACRN_IOREQ_TYPE_PCICFG type I/O request and continues the processing.
316  */
317 static bool handle_cf8cfc(struct acrn_vm *vm,
318                           struct acrn_io_request *req, u16 vcpu)
319 {
320         int offset, pci_cfg_addr, pci_reg;
321         bool is_handled = false;
322
323         if (is_cfg_addr(req)) {
324                 WARN_ON(req->reqs.pio_request.size != 4);
325                 if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_WRITE)
326                         vm->pci_conf_addr = req->reqs.pio_request.value;
327                 else
328                         req->reqs.pio_request.value = vm->pci_conf_addr;
329                 is_handled = true;
330         } else if (is_cfg_data(req)) {
331                 if (!(vm->pci_conf_addr & CONF1_ENABLE)) {
332                         if (req->reqs.pio_request.direction ==
333                                         ACRN_IOREQ_DIR_READ)
334                                 req->reqs.pio_request.value = 0xffffffff;
335                         is_handled = true;
336                 } else {
337                         offset = req->reqs.pio_request.address - 0xcfc;
338
339                         req->type = ACRN_IOREQ_TYPE_PCICFG;
340                         pci_cfg_addr = vm->pci_conf_addr;
341                         req->reqs.pci_request.bus =
342                                         (pci_cfg_addr >> 16) & PCI_BUSMAX;
343                         req->reqs.pci_request.dev =
344                                         (pci_cfg_addr >> 11) & PCI_SLOTMAX;
345                         req->reqs.pci_request.func =
346                                         (pci_cfg_addr >> 8) & PCI_FUNCMAX;
347                         pci_reg = (pci_cfg_addr & PCI_LOWREG_MASK) +
348                                    ((pci_cfg_addr >> 16) & PCI_HIGHREG_MASK);
349                         req->reqs.pci_request.reg = pci_reg + offset;
350                 }
351         }
352
353         if (is_handled)
354                 ioreq_complete_request(vm, vcpu, req);
355
356         return is_handled;
357 }
358
359 static bool in_range(struct acrn_ioreq_range *range,
360                      struct acrn_io_request *req)
361 {
362         bool ret = false;
363
364         if (range->type == req->type) {
365                 switch (req->type) {
366                 case ACRN_IOREQ_TYPE_MMIO:
367                         if (req->reqs.mmio_request.address >= range->start &&
368                             (req->reqs.mmio_request.address +
369                              req->reqs.mmio_request.size - 1) <= range->end)
370                                 ret = true;
371                         break;
372                 case ACRN_IOREQ_TYPE_PORTIO:
373                         if (req->reqs.pio_request.address >= range->start &&
374                             (req->reqs.pio_request.address +
375                              req->reqs.pio_request.size - 1) <= range->end)
376                                 ret = true;
377                         break;
378                 default:
379                         break;
380                 }
381         }
382
383         return ret;
384 }
385
386 static struct acrn_ioreq_client *find_ioreq_client(struct acrn_vm *vm,
387                                                    struct acrn_io_request *req)
388 {
389         struct acrn_ioreq_client *client, *found = NULL;
390         struct acrn_ioreq_range *range;
391
392         lockdep_assert_held(&vm->ioreq_clients_lock);
393
394         list_for_each_entry(client, &vm->ioreq_clients, list) {
395                 read_lock_bh(&client->range_lock);
396                 list_for_each_entry(range, &client->range_list, list) {
397                         if (in_range(range, req)) {
398                                 found = client;
399                                 break;
400                         }
401                 }
402                 read_unlock_bh(&client->range_lock);
403                 if (found)
404                         break;
405         }
406         return found ? found : vm->default_client;
407 }
408
409 /**
410  * acrn_ioreq_client_create() - Create an ioreq client
411  * @vm:         The VM that this client belongs to
412  * @handler:    The ioreq_handler of ioreq client acrn_hsm will create a kernel
413  *              thread and call the handler to handle I/O requests.
414  * @priv:       Private data for the handler
415  * @is_default: If it is the default client
416  * @name:       The name of ioreq client
417  *
418  * Return: acrn_ioreq_client pointer on success, NULL on error
419  */
420 struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
421                                                    ioreq_handler_t handler,
422                                                    void *priv, bool is_default,
423                                                    const char *name)
424 {
425         struct acrn_ioreq_client *client;
426
427         if (!handler && !is_default) {
428                 dev_dbg(acrn_dev.this_device,
429                         "Cannot create non-default client w/o handler!\n");
430                 return NULL;
431         }
432         client = kzalloc(sizeof(*client), GFP_KERNEL);
433         if (!client)
434                 return NULL;
435
436         client->handler = handler;
437         client->vm = vm;
438         client->priv = priv;
439         client->is_default = is_default;
440         if (name)
441                 strncpy(client->name, name, sizeof(client->name) - 1);
442         rwlock_init(&client->range_lock);
443         INIT_LIST_HEAD(&client->range_list);
444         init_waitqueue_head(&client->wq);
445
446         if (client->handler) {
447                 client->thread = kthread_run(ioreq_task, client, "VM%u-%s",
448                                              client->vm->vmid, client->name);
449                 if (IS_ERR(client->thread)) {
450                         kfree(client);
451                         return NULL;
452                 }
453         }
454
455         spin_lock_bh(&vm->ioreq_clients_lock);
456         if (is_default)
457                 vm->default_client = client;
458         else
459                 list_add(&client->list, &vm->ioreq_clients);
460         spin_unlock_bh(&vm->ioreq_clients_lock);
461
462         dev_dbg(acrn_dev.this_device, "Created ioreq client %s.\n", name);
463         return client;
464 }
465
466 /**
467  * acrn_ioreq_client_destroy() - Destroy an ioreq client
468  * @client:     The ioreq client
469  */
470 void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client)
471 {
472         struct acrn_ioreq_range *range, *next;
473         struct acrn_vm *vm = client->vm;
474
475         dev_dbg(acrn_dev.this_device,
476                 "Destroy ioreq client %s.\n", client->name);
477         ioreq_pause();
478         set_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
479         if (client->is_default)
480                 wake_up_interruptible(&client->wq);
481         else
482                 kthread_stop(client->thread);
483
484         spin_lock_bh(&vm->ioreq_clients_lock);
485         if (client->is_default)
486                 vm->default_client = NULL;
487         else
488                 list_del(&client->list);
489         spin_unlock_bh(&vm->ioreq_clients_lock);
490
491         write_lock_bh(&client->range_lock);
492         list_for_each_entry_safe(range, next, &client->range_list, list) {
493                 list_del(&range->list);
494                 kfree(range);
495         }
496         write_unlock_bh(&client->range_lock);
497         kfree(client);
498
499         ioreq_resume();
500 }
501
502 static int acrn_ioreq_dispatch(struct acrn_vm *vm)
503 {
504         struct acrn_ioreq_client *client;
505         struct acrn_io_request *req;
506         int i;
507
508         for (i = 0; i < vm->vcpu_num; i++) {
509                 req = vm->ioreq_buf->req_slot + i;
510
511                 /* barrier the read of processed of acrn_io_request */
512                 if (smp_load_acquire(&req->processed) ==
513                                      ACRN_IOREQ_STATE_PENDING) {
514                         /* Complete the IO request directly in clearing stage */
515                         if (test_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags)) {
516                                 ioreq_complete_request(vm, i, req);
517                                 continue;
518                         }
519                         if (handle_cf8cfc(vm, req, i))
520                                 continue;
521
522                         spin_lock_bh(&vm->ioreq_clients_lock);
523                         client = find_ioreq_client(vm, req);
524                         if (!client) {
525                                 dev_err(acrn_dev.this_device,
526                                         "Failed to find ioreq client!\n");
527                                 spin_unlock_bh(&vm->ioreq_clients_lock);
528                                 return -EINVAL;
529                         }
530                         if (!client->is_default)
531                                 req->kernel_handled = 1;
532                         else
533                                 req->kernel_handled = 0;
534                         /*
535                          * Add barrier() to make sure the writes are done
536                          * before setting ACRN_IOREQ_STATE_PROCESSING
537                          */
538                         smp_store_release(&req->processed,
539                                           ACRN_IOREQ_STATE_PROCESSING);
540                         set_bit(i, client->ioreqs_map);
541                         wake_up_interruptible(&client->wq);
542                         spin_unlock_bh(&vm->ioreq_clients_lock);
543                 }
544         }
545
546         return 0;
547 }
548
549 static void ioreq_dispatcher(struct work_struct *work)
550 {
551         struct acrn_vm *vm;
552
553         read_lock(&acrn_vm_list_lock);
554         list_for_each_entry(vm, &acrn_vm_list, list) {
555                 if (!vm->ioreq_buf)
556                         break;
557                 acrn_ioreq_dispatch(vm);
558         }
559         read_unlock(&acrn_vm_list_lock);
560 }
561
562 static void ioreq_intr_handler(void)
563 {
564         queue_work(ioreq_wq, &ioreq_work);
565 }
566
567 static void ioreq_pause(void)
568 {
569         /* Flush and unarm the handler to ensure no I/O requests pending */
570         acrn_remove_intr_handler();
571         drain_workqueue(ioreq_wq);
572 }
573
574 static void ioreq_resume(void)
575 {
576         /* Schedule after enabling in case other clients miss interrupt */
577         acrn_setup_intr_handler(ioreq_intr_handler);
578         queue_work(ioreq_wq, &ioreq_work);
579 }
580
581 int acrn_ioreq_intr_setup(void)
582 {
583         acrn_setup_intr_handler(ioreq_intr_handler);
584         ioreq_wq = alloc_workqueue("ioreq_wq",
585                                    WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
586         if (!ioreq_wq) {
587                 dev_err(acrn_dev.this_device, "Failed to alloc workqueue!\n");
588                 acrn_remove_intr_handler();
589                 return -ENOMEM;
590         }
591         return 0;
592 }
593
594 void acrn_ioreq_intr_remove(void)
595 {
596         if (ioreq_wq)
597                 destroy_workqueue(ioreq_wq);
598         acrn_remove_intr_handler();
599 }
600
601 int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma)
602 {
603         struct acrn_ioreq_buffer *set_buffer;
604         struct page *page;
605         int ret;
606
607         if (vm->ioreq_buf)
608                 return -EEXIST;
609
610         set_buffer = kzalloc(sizeof(*set_buffer), GFP_KERNEL);
611         if (!set_buffer)
612                 return -ENOMEM;
613
614         ret = pin_user_pages_fast(buf_vma, 1,
615                                   FOLL_WRITE | FOLL_LONGTERM, &page);
616         if (unlikely(ret != 1) || !page) {
617                 dev_err(acrn_dev.this_device, "Failed to pin ioreq page!\n");
618                 ret = -EFAULT;
619                 goto free_buf;
620         }
621
622         vm->ioreq_buf = page_address(page);
623         vm->ioreq_page = page;
624         set_buffer->ioreq_buf = page_to_phys(page);
625         ret = hcall_set_ioreq_buffer(vm->vmid, virt_to_phys(set_buffer));
626         if (ret < 0) {
627                 dev_err(acrn_dev.this_device, "Failed to init ioreq buffer!\n");
628                 unpin_user_page(page);
629                 vm->ioreq_buf = NULL;
630                 goto free_buf;
631         }
632
633         dev_dbg(acrn_dev.this_device,
634                 "Init ioreq buffer %pK!\n", vm->ioreq_buf);
635         ret = 0;
636 free_buf:
637         kfree(set_buffer);
638         return ret;
639 }
640
641 void acrn_ioreq_deinit(struct acrn_vm *vm)
642 {
643         struct acrn_ioreq_client *client, *next;
644
645         dev_dbg(acrn_dev.this_device,
646                 "Deinit ioreq buffer %pK!\n", vm->ioreq_buf);
647         /* Destroy all clients belonging to this VM */
648         list_for_each_entry_safe(client, next, &vm->ioreq_clients, list)
649                 acrn_ioreq_client_destroy(client);
650         if (vm->default_client)
651                 acrn_ioreq_client_destroy(vm->default_client);
652
653         if (vm->ioreq_buf && vm->ioreq_page) {
654                 unpin_user_page(vm->ioreq_page);
655                 vm->ioreq_buf = NULL;
656         }
657 }