1 // SPDX-License-Identifier: GPL-2.0
3 * ACRN_HSM: Handle I/O requests
5 * Copyright (C) 2020 Intel Corporation. All rights reserved.
8 * Jason Chen CJ <jason.cj.chen@intel.com>
9 * Fengwei Yin <fengwei.yin@intel.com>
12 #include <linux/interrupt.h>
14 #include <linux/kthread.h>
16 #include <linux/slab.h>
22 static void ioreq_pause(void);
23 static void ioreq_resume(void);
25 static void ioreq_dispatcher(struct work_struct *work);
26 static struct workqueue_struct *ioreq_wq;
27 static DECLARE_WORK(ioreq_work, ioreq_dispatcher);
29 static inline bool has_pending_request(struct acrn_ioreq_client *client)
31 return !bitmap_empty(client->ioreqs_map, ACRN_IO_REQUEST_MAX);
34 static inline bool is_destroying(struct acrn_ioreq_client *client)
36 return test_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
39 static int ioreq_complete_request(struct acrn_vm *vm, u16 vcpu,
40 struct acrn_io_request *acrn_req)
45 polling_mode = acrn_req->completion_polling;
46 /* Add barrier() to make sure the writes are done before completion */
47 smp_store_release(&acrn_req->processed, ACRN_IOREQ_STATE_COMPLETE);
50 * To fulfill the requirement of real-time in several industry
51 * scenarios, like automotive, ACRN can run under the partition mode,
52 * in which User VMs and Service VM are bound to dedicated CPU cores.
53 * Polling mode of handling the I/O request is introduced to achieve a
54 * faster I/O request handling. In polling mode, the hypervisor polls
55 * I/O request's completion. Once an I/O request is marked as
56 * ACRN_IOREQ_STATE_COMPLETE, hypervisor resumes from the polling point
57 * to continue the I/O request flow. Thus, the completion notification
58 * from HSM of I/O request is not needed. Please note,
59 * completion_polling needs to be read before the I/O request being
60 * marked as ACRN_IOREQ_STATE_COMPLETE to avoid racing with the
64 ret = hcall_notify_req_finish(vm->vmid, vcpu);
66 dev_err(acrn_dev.this_device,
67 "Notify I/O request finished failed!\n");
73 static int acrn_ioreq_complete_request(struct acrn_ioreq_client *client,
75 struct acrn_io_request *acrn_req)
79 if (vcpu >= client->vm->vcpu_num)
82 clear_bit(vcpu, client->ioreqs_map);
84 acrn_req = (struct acrn_io_request *)client->vm->ioreq_buf;
88 ret = ioreq_complete_request(client->vm, vcpu, acrn_req);
93 int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu)
97 spin_lock_bh(&vm->ioreq_clients_lock);
98 if (vm->default_client)
99 ret = acrn_ioreq_complete_request(vm->default_client,
101 spin_unlock_bh(&vm->ioreq_clients_lock);
107 * acrn_ioreq_range_add() - Add an iorange monitored by an ioreq client
108 * @client: The ioreq client
109 * @type: Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
110 * @start: Start address of iorange
111 * @end: End address of iorange
113 * Return: 0 on success, <0 on error
115 int acrn_ioreq_range_add(struct acrn_ioreq_client *client,
116 u32 type, u64 start, u64 end)
118 struct acrn_ioreq_range *range;
121 dev_err(acrn_dev.this_device,
122 "Invalid IO range [0x%llx,0x%llx]\n", start, end);
126 range = kzalloc(sizeof(*range), GFP_KERNEL);
131 range->start = start;
134 write_lock_bh(&client->range_lock);
135 list_add(&range->list, &client->range_list);
136 write_unlock_bh(&client->range_lock);
142 * acrn_ioreq_range_del() - Del an iorange monitored by an ioreq client
143 * @client: The ioreq client
144 * @type: Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
145 * @start: Start address of iorange
146 * @end: End address of iorange
148 void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
149 u32 type, u64 start, u64 end)
151 struct acrn_ioreq_range *range;
153 write_lock_bh(&client->range_lock);
154 list_for_each_entry(range, &client->range_list, list) {
155 if (type == range->type &&
156 start == range->start &&
158 list_del(&range->list);
163 write_unlock_bh(&client->range_lock);
167 * ioreq_task() is the execution entity of handler thread of an I/O client.
168 * The handler callback of the I/O client is called within the handler thread.
170 static int ioreq_task(void *data)
172 struct acrn_ioreq_client *client = data;
173 struct acrn_io_request *req;
174 unsigned long *ioreqs_map;
178 * Lockless access to ioreqs_map is safe, because
179 * 1) set_bit() and clear_bit() are atomic operations.
180 * 2) I/O requests arrives serialized. The access flow of ioreqs_map is:
181 * set_bit() - in ioreq_work handler
182 * Handler callback handles corresponding I/O request
183 * clear_bit() - in handler thread (include ACRN userspace)
184 * Mark corresponding I/O request completed
185 * Loop again if a new I/O request occurs
187 ioreqs_map = client->ioreqs_map;
188 while (!kthread_should_stop()) {
189 acrn_ioreq_client_wait(client);
190 while (has_pending_request(client)) {
191 vcpu = find_first_bit(ioreqs_map, client->vm->vcpu_num);
192 req = client->vm->ioreq_buf->req_slot + vcpu;
193 ret = client->handler(client, req);
195 dev_err(acrn_dev.this_device,
196 "IO handle failure: %d\n", ret);
199 acrn_ioreq_complete_request(client, vcpu, req);
207 * For the non-default I/O clients, give them chance to complete the current
208 * I/O requests if there are any. For the default I/O client, it is safe to
209 * clear all pending I/O requests because the clearing request is from ACRN
212 void acrn_ioreq_request_clear(struct acrn_vm *vm)
214 struct acrn_ioreq_client *client;
215 bool has_pending = false;
220 * IO requests of this VM will be completed directly in
221 * acrn_ioreq_dispatch if ACRN_VM_FLAG_CLEARING_IOREQ flag is set.
223 set_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
226 * acrn_ioreq_request_clear is only called in VM reset case. Simply
227 * wait 100ms in total for the IO requests' completion.
230 spin_lock_bh(&vm->ioreq_clients_lock);
231 list_for_each_entry(client, &vm->ioreq_clients, list) {
232 has_pending = has_pending_request(client);
236 spin_unlock_bh(&vm->ioreq_clients_lock);
239 schedule_timeout_interruptible(HZ / 100);
240 } while (has_pending && --retry > 0);
242 dev_warn(acrn_dev.this_device,
243 "%s cannot flush pending request!\n", client->name);
245 /* Clear all ioreqs belonging to the default client */
246 spin_lock_bh(&vm->ioreq_clients_lock);
247 client = vm->default_client;
249 vcpu = find_next_bit(client->ioreqs_map,
250 ACRN_IO_REQUEST_MAX, 0);
251 while (vcpu < ACRN_IO_REQUEST_MAX) {
252 acrn_ioreq_complete_request(client, vcpu, NULL);
253 vcpu = find_next_bit(client->ioreqs_map,
254 ACRN_IO_REQUEST_MAX, vcpu + 1);
257 spin_unlock_bh(&vm->ioreq_clients_lock);
259 /* Clear ACRN_VM_FLAG_CLEARING_IOREQ flag after the clearing */
260 clear_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
263 int acrn_ioreq_client_wait(struct acrn_ioreq_client *client)
265 if (client->is_default) {
267 * In the default client, a user space thread waits on the
268 * waitqueue. The is_destroying() check is used to notify user
269 * space the client is going to be destroyed.
271 wait_event_interruptible(client->wq,
272 has_pending_request(client) ||
273 is_destroying(client));
274 if (is_destroying(client))
277 wait_event_interruptible(client->wq,
278 has_pending_request(client) ||
279 kthread_should_stop());
285 static bool is_cfg_addr(struct acrn_io_request *req)
287 return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
288 (req->reqs.pio_request.address == 0xcf8));
291 static bool is_cfg_data(struct acrn_io_request *req)
293 return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
294 ((req->reqs.pio_request.address >= 0xcfc) &&
295 (req->reqs.pio_request.address < (0xcfc + 4))));
298 /* The low 8-bit of supported pci_reg addr.*/
299 #define PCI_LOWREG_MASK 0xFC
300 /* The high 4-bit of supported pci_reg addr */
301 #define PCI_HIGHREG_MASK 0xF00
302 /* Max number of supported functions */
303 #define PCI_FUNCMAX 7
304 /* Max number of supported slots */
305 #define PCI_SLOTMAX 31
306 /* Max number of supported buses */
307 #define PCI_BUSMAX 255
308 #define CONF1_ENABLE 0x80000000UL
310 * A PCI configuration space access via PIO 0xCF8 and 0xCFC normally has two
312 * 1) writes address into 0xCF8 port
313 * 2) accesses data in/from 0xCFC
314 * This function combines such paired PCI configuration space I/O requests into
315 * one ACRN_IOREQ_TYPE_PCICFG type I/O request and continues the processing.
317 static bool handle_cf8cfc(struct acrn_vm *vm,
318 struct acrn_io_request *req, u16 vcpu)
320 int offset, pci_cfg_addr, pci_reg;
321 bool is_handled = false;
323 if (is_cfg_addr(req)) {
324 WARN_ON(req->reqs.pio_request.size != 4);
325 if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_WRITE)
326 vm->pci_conf_addr = req->reqs.pio_request.value;
328 req->reqs.pio_request.value = vm->pci_conf_addr;
330 } else if (is_cfg_data(req)) {
331 if (!(vm->pci_conf_addr & CONF1_ENABLE)) {
332 if (req->reqs.pio_request.direction ==
334 req->reqs.pio_request.value = 0xffffffff;
337 offset = req->reqs.pio_request.address - 0xcfc;
339 req->type = ACRN_IOREQ_TYPE_PCICFG;
340 pci_cfg_addr = vm->pci_conf_addr;
341 req->reqs.pci_request.bus =
342 (pci_cfg_addr >> 16) & PCI_BUSMAX;
343 req->reqs.pci_request.dev =
344 (pci_cfg_addr >> 11) & PCI_SLOTMAX;
345 req->reqs.pci_request.func =
346 (pci_cfg_addr >> 8) & PCI_FUNCMAX;
347 pci_reg = (pci_cfg_addr & PCI_LOWREG_MASK) +
348 ((pci_cfg_addr >> 16) & PCI_HIGHREG_MASK);
349 req->reqs.pci_request.reg = pci_reg + offset;
354 ioreq_complete_request(vm, vcpu, req);
359 static bool in_range(struct acrn_ioreq_range *range,
360 struct acrn_io_request *req)
364 if (range->type == req->type) {
366 case ACRN_IOREQ_TYPE_MMIO:
367 if (req->reqs.mmio_request.address >= range->start &&
368 (req->reqs.mmio_request.address +
369 req->reqs.mmio_request.size - 1) <= range->end)
372 case ACRN_IOREQ_TYPE_PORTIO:
373 if (req->reqs.pio_request.address >= range->start &&
374 (req->reqs.pio_request.address +
375 req->reqs.pio_request.size - 1) <= range->end)
386 static struct acrn_ioreq_client *find_ioreq_client(struct acrn_vm *vm,
387 struct acrn_io_request *req)
389 struct acrn_ioreq_client *client, *found = NULL;
390 struct acrn_ioreq_range *range;
392 lockdep_assert_held(&vm->ioreq_clients_lock);
394 list_for_each_entry(client, &vm->ioreq_clients, list) {
395 read_lock_bh(&client->range_lock);
396 list_for_each_entry(range, &client->range_list, list) {
397 if (in_range(range, req)) {
402 read_unlock_bh(&client->range_lock);
406 return found ? found : vm->default_client;
410 * acrn_ioreq_client_create() - Create an ioreq client
411 * @vm: The VM that this client belongs to
412 * @handler: The ioreq_handler of ioreq client acrn_hsm will create a kernel
413 * thread and call the handler to handle I/O requests.
414 * @priv: Private data for the handler
415 * @is_default: If it is the default client
416 * @name: The name of ioreq client
418 * Return: acrn_ioreq_client pointer on success, NULL on error
420 struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
421 ioreq_handler_t handler,
422 void *priv, bool is_default,
425 struct acrn_ioreq_client *client;
427 if (!handler && !is_default) {
428 dev_dbg(acrn_dev.this_device,
429 "Cannot create non-default client w/o handler!\n");
432 client = kzalloc(sizeof(*client), GFP_KERNEL);
436 client->handler = handler;
439 client->is_default = is_default;
441 strncpy(client->name, name, sizeof(client->name) - 1);
442 rwlock_init(&client->range_lock);
443 INIT_LIST_HEAD(&client->range_list);
444 init_waitqueue_head(&client->wq);
446 if (client->handler) {
447 client->thread = kthread_run(ioreq_task, client, "VM%u-%s",
448 client->vm->vmid, client->name);
449 if (IS_ERR(client->thread)) {
455 spin_lock_bh(&vm->ioreq_clients_lock);
457 vm->default_client = client;
459 list_add(&client->list, &vm->ioreq_clients);
460 spin_unlock_bh(&vm->ioreq_clients_lock);
462 dev_dbg(acrn_dev.this_device, "Created ioreq client %s.\n", name);
467 * acrn_ioreq_client_destroy() - Destroy an ioreq client
468 * @client: The ioreq client
470 void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client)
472 struct acrn_ioreq_range *range, *next;
473 struct acrn_vm *vm = client->vm;
475 dev_dbg(acrn_dev.this_device,
476 "Destroy ioreq client %s.\n", client->name);
478 set_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
479 if (client->is_default)
480 wake_up_interruptible(&client->wq);
482 kthread_stop(client->thread);
484 spin_lock_bh(&vm->ioreq_clients_lock);
485 if (client->is_default)
486 vm->default_client = NULL;
488 list_del(&client->list);
489 spin_unlock_bh(&vm->ioreq_clients_lock);
491 write_lock_bh(&client->range_lock);
492 list_for_each_entry_safe(range, next, &client->range_list, list) {
493 list_del(&range->list);
496 write_unlock_bh(&client->range_lock);
502 static int acrn_ioreq_dispatch(struct acrn_vm *vm)
504 struct acrn_ioreq_client *client;
505 struct acrn_io_request *req;
508 for (i = 0; i < vm->vcpu_num; i++) {
509 req = vm->ioreq_buf->req_slot + i;
511 /* barrier the read of processed of acrn_io_request */
512 if (smp_load_acquire(&req->processed) ==
513 ACRN_IOREQ_STATE_PENDING) {
514 /* Complete the IO request directly in clearing stage */
515 if (test_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags)) {
516 ioreq_complete_request(vm, i, req);
519 if (handle_cf8cfc(vm, req, i))
522 spin_lock_bh(&vm->ioreq_clients_lock);
523 client = find_ioreq_client(vm, req);
525 dev_err(acrn_dev.this_device,
526 "Failed to find ioreq client!\n");
527 spin_unlock_bh(&vm->ioreq_clients_lock);
530 if (!client->is_default)
531 req->kernel_handled = 1;
533 req->kernel_handled = 0;
535 * Add barrier() to make sure the writes are done
536 * before setting ACRN_IOREQ_STATE_PROCESSING
538 smp_store_release(&req->processed,
539 ACRN_IOREQ_STATE_PROCESSING);
540 set_bit(i, client->ioreqs_map);
541 wake_up_interruptible(&client->wq);
542 spin_unlock_bh(&vm->ioreq_clients_lock);
549 static void ioreq_dispatcher(struct work_struct *work)
553 read_lock(&acrn_vm_list_lock);
554 list_for_each_entry(vm, &acrn_vm_list, list) {
557 acrn_ioreq_dispatch(vm);
559 read_unlock(&acrn_vm_list_lock);
562 static void ioreq_intr_handler(void)
564 queue_work(ioreq_wq, &ioreq_work);
567 static void ioreq_pause(void)
569 /* Flush and unarm the handler to ensure no I/O requests pending */
570 acrn_remove_intr_handler();
571 drain_workqueue(ioreq_wq);
574 static void ioreq_resume(void)
576 /* Schedule after enabling in case other clients miss interrupt */
577 acrn_setup_intr_handler(ioreq_intr_handler);
578 queue_work(ioreq_wq, &ioreq_work);
581 int acrn_ioreq_intr_setup(void)
583 acrn_setup_intr_handler(ioreq_intr_handler);
584 ioreq_wq = alloc_workqueue("ioreq_wq",
585 WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
587 dev_err(acrn_dev.this_device, "Failed to alloc workqueue!\n");
588 acrn_remove_intr_handler();
594 void acrn_ioreq_intr_remove(void)
597 destroy_workqueue(ioreq_wq);
598 acrn_remove_intr_handler();
601 int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma)
603 struct acrn_ioreq_buffer *set_buffer;
610 set_buffer = kzalloc(sizeof(*set_buffer), GFP_KERNEL);
614 ret = pin_user_pages_fast(buf_vma, 1,
615 FOLL_WRITE | FOLL_LONGTERM, &page);
616 if (unlikely(ret != 1) || !page) {
617 dev_err(acrn_dev.this_device, "Failed to pin ioreq page!\n");
622 vm->ioreq_buf = page_address(page);
623 vm->ioreq_page = page;
624 set_buffer->ioreq_buf = page_to_phys(page);
625 ret = hcall_set_ioreq_buffer(vm->vmid, virt_to_phys(set_buffer));
627 dev_err(acrn_dev.this_device, "Failed to init ioreq buffer!\n");
628 unpin_user_page(page);
629 vm->ioreq_buf = NULL;
633 dev_dbg(acrn_dev.this_device,
634 "Init ioreq buffer %pK!\n", vm->ioreq_buf);
641 void acrn_ioreq_deinit(struct acrn_vm *vm)
643 struct acrn_ioreq_client *client, *next;
645 dev_dbg(acrn_dev.this_device,
646 "Deinit ioreq buffer %pK!\n", vm->ioreq_buf);
647 /* Destroy all clients belonging to this VM */
648 list_for_each_entry_safe(client, next, &vm->ioreq_clients, list)
649 acrn_ioreq_client_destroy(client);
650 if (vm->default_client)
651 acrn_ioreq_client_destroy(vm->default_client);
653 if (vm->ioreq_buf && vm->ioreq_page) {
654 unpin_user_page(vm->ioreq_page);
655 vm->ioreq_buf = NULL;