drivers/misc/vmw_vmci/vmci_queue_pair.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * VMware VMCI Driver
   4  *
   5  * Copyright (C) 2012 VMware, Inc. All rights reserved.
   6  */
   7
   8 #include <linux/vmw_vmci_defs.h>
   9 #include <linux/vmw_vmci_api.h>
  10 #include <linux/highmem.h>
  11 #include <linux/kernel.h>
  12 #include <linux/mm.h>
  13 #include <linux/module.h>
  14 #include <linux/mutex.h>
  15 #include <linux/pagemap.h>
  16 #include <linux/pci.h>
  17 #include <linux/sched.h>
  18 #include <linux/slab.h>
  19 #include <linux/uio.h>
  20 #include <linux/wait.h>
  21 #include <linux/vmalloc.h>
  22 #include <linux/skbuff.h>
  23
  24 #include "vmci_handle_array.h"
  25 #include "vmci_queue_pair.h"
  26 #include "vmci_datagram.h"
  27 #include "vmci_resource.h"
  28 #include "vmci_context.h"
  29 #include "vmci_driver.h"
  30 #include "vmci_event.h"
  31 #include "vmci_route.h"
  32
  33 /*
  34  * In the following, we will distinguish between two kinds of VMX processes -
  35  * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
  36  * VMCI page files in the VMX and supporting VM to VM communication and the
  37  * newer ones that use the guest memory directly. We will in the following
  38  * refer to the older VMX versions as old-style VMX'en, and the newer ones as
  39  * new-style VMX'en.
  40  *
  41  * The state transition datagram is as follows (the VMCIQPB_ prefix has been
  42  * removed for readability) - see below for more details on the transtions:
  43  *
  44  *            --------------  NEW  -------------
  45  *            |                                |
  46  *           \_/                              \_/
  47  *     CREATED_NO_MEM <-----------------> CREATED_MEM
  48  *            |    |                           |
  49  *            |    o-----------------------o   |
  50  *            |                            |   |
  51  *           \_/                          \_/ \_/
  52  *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
  53  *            |                            |   |
  54  *            |     o----------------------o   |
  55  *            |     |                          |
  56  *           \_/   \_/                        \_/
  57  *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
  58  *            |                                |
  59  *            |                                |
  60  *            -------------> gone <-------------
  61  *
  62  * In more detail. When a VMCI queue pair is first created, it will be in the
  63  * VMCIQPB_NEW state. It will then move into one of the following states:
  64  *
  65  * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
  66  *
  67  *     - the created was performed by a host endpoint, in which case there is
  68  *       no backing memory yet.
  69  *
  70  *     - the create was initiated by an old-style VMX, that uses
  71  *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
  72  *       a later point in time. This state can be distinguished from the one
  73  *       above by the context ID of the creator. A host side is not allowed to
  74  *       attach until the page store has been set.
  75  *
  76  * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
  77  *     is created by a VMX using the queue pair device backend that
  78  *     sets the UVAs of the queue pair immediately and stores the
  79  *     information for later attachers. At this point, it is ready for
  80  *     the host side to attach to it.
  81  *
  82  * Once the queue pair is in one of the created states (with the exception of
  83  * the case mentioned for older VMX'en above), it is possible to attach to the
  84  * queue pair. Again we have two new states possible:
  85  *
  86  * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
  87  *   paths:
  88  *
  89  *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
  90  *       pair, and attaches to a queue pair previously created by the host side.
  91  *
  92  *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
  93  *       already created by a guest.
  94  *
  95  *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
  96  *       vmci_qp_broker_set_page_store (see below).
  97  *
  98  * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
  99  *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
 100  *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
 101  *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
 102  *     will be entered.
 103  *
 104  * From the attached queue pair, the queue pair can enter the shutdown states
 105  * when either side of the queue pair detaches. If the guest side detaches
 106  * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
 107  * the content of the queue pair will no longer be available. If the host
 108  * side detaches first, the queue pair will either enter the
 109  * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
 110  * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
 111  * (e.g., the host detaches while a guest is stunned).
 112  *
 113  * New-style VMX'en will also unmap guest memory, if the guest is
 114  * quiesced, e.g., during a snapshot operation. In that case, the guest
 115  * memory will no longer be available, and the queue pair will transition from
 116  * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
 117  * in which case the queue pair will transition from the *_NO_MEM state at that
 118  * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
 119  * since the peer may have either attached or detached in the meantime. The
 120  * values are laid out such that ++ on a state will move from a *_NO_MEM to a
 121  * *_MEM state, and vice versa.
 122  */
 123
 124 /* The Kernel specific component of the struct vmci_queue structure. */
 125 struct vmci_queue_kern_if {
 126         struct mutex __mutex;   /* Protects the queue. */
 127         struct mutex *mutex;    /* Shared by producer and consumer queues. */
 128         size_t num_pages;       /* Number of pages incl. header. */
 129         bool host;              /* Host or guest? */
 130         union {
 131                 struct {
 132                         dma_addr_t *pas;
 133                         void **vas;
 134                 } g;            /* Used by the guest. */
 135                 struct {
 136                         struct page **page;
 137                         struct page **header_page;
 138                 } h;            /* Used by the host. */
 139         } u;
 140 };
 141
 142 /*
 143  * This structure is opaque to the clients.
 144  */
 145 struct vmci_qp {
 146         struct vmci_handle handle;
 147         struct vmci_queue *produce_q;
 148         struct vmci_queue *consume_q;
 149         u64 produce_q_size;
 150         u64 consume_q_size;
 151         u32 peer;
 152         u32 flags;
 153         u32 priv_flags;
 154         bool guest_endpoint;
 155         unsigned int blocked;
 156         unsigned int generation;
 157         wait_queue_head_t event;
 158 };
 159
 160 enum qp_broker_state {
 161         VMCIQPB_NEW,
 162         VMCIQPB_CREATED_NO_MEM,
 163         VMCIQPB_CREATED_MEM,
 164         VMCIQPB_ATTACHED_NO_MEM,
 165         VMCIQPB_ATTACHED_MEM,
 166         VMCIQPB_SHUTDOWN_NO_MEM,
 167         VMCIQPB_SHUTDOWN_MEM,
 168         VMCIQPB_GONE
 169 };
 170
 171 #define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
 172                                      _qpb->state == VMCIQPB_ATTACHED_MEM || \
 173                                      _qpb->state == VMCIQPB_SHUTDOWN_MEM)
 174
 175 /*
 176  * In the queue pair broker, we always use the guest point of view for
 177  * the produce and consume queue values and references, e.g., the
 178  * produce queue size stored is the guests produce queue size. The
 179  * host endpoint will need to swap these around. The only exception is
 180  * the local queue pairs on the host, in which case the host endpoint
 181  * that creates the queue pair will have the right orientation, and
 182  * the attaching host endpoint will need to swap.
 183  */
 184 struct qp_entry {
 185         struct list_head list_item;
 186         struct vmci_handle handle;
 187         u32 peer;
 188         u32 flags;
 189         u64 produce_size;
 190         u64 consume_size;
 191         u32 ref_count;
 192 };
 193
 194 struct qp_broker_entry {
 195         struct vmci_resource resource;
 196         struct qp_entry qp;
 197         u32 create_id;
 198         u32 attach_id;
 199         enum qp_broker_state state;
 200         bool require_trusted_attach;
 201         bool created_by_trusted;
 202         bool vmci_page_files;   /* Created by VMX using VMCI page files */
 203         struct vmci_queue *produce_q;
 204         struct vmci_queue *consume_q;
 205         struct vmci_queue_header saved_produce_q;
 206         struct vmci_queue_header saved_consume_q;
 207         vmci_event_release_cb wakeup_cb;
 208         void *client_data;
 209         void *local_mem;        /* Kernel memory for local queue pair */
 210 };
 211
 212 struct qp_guest_endpoint {
 213         struct vmci_resource resource;
 214         struct qp_entry qp;
 215         u64 num_ppns;
 216         void *produce_q;
 217         void *consume_q;
 218         struct ppn_set ppn_set;
 219 };
 220
 221 struct qp_list {
 222         struct list_head head;
 223         struct mutex mutex;     /* Protect queue list. */
 224 };
 225
 226 static struct qp_list qp_broker_list = {
 227         .head = LIST_HEAD_INIT(qp_broker_list.head),
 228         .mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
 229 };
 230
 231 static struct qp_list qp_guest_endpoints = {
 232         .head = LIST_HEAD_INIT(qp_guest_endpoints.head),
 233         .mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
 234 };
 235
 236 #define INVALID_VMCI_GUEST_MEM_ID  0
 237 #define QPE_NUM_PAGES(_QPE) ((u32) \
 238                              (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
 239                               DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
 240 #define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \
 241         ((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \
 242          (_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY)
 243
 244 /*
 245  * Frees kernel VA space for a given queue and its queue header, and
 246  * frees physical data pages.
 247  */
 248 static void qp_free_queue(void *q, u64 size)
 249 {
 250         struct vmci_queue *queue = q;
 251
 252         if (queue) {
 253                 u64 i;
 254
 255                 /* Given size does not include header, so add in a page here. */
 256                 for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) {
 257                         dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE,
 258                                           queue->kernel_if->u.g.vas[i],
 259                                           queue->kernel_if->u.g.pas[i]);
 260                 }
 261
 262                 vfree(queue);
 263         }
 264 }
 265
 266 /*
 267  * Allocates kernel queue pages of specified size with IOMMU mappings,
 268  * plus space for the queue structure/kernel interface and the queue
 269  * header.
 270  */
 271 static void *qp_alloc_queue(u64 size, u32 flags)
 272 {
 273         u64 i;
 274         struct vmci_queue *queue;
 275         size_t pas_size;
 276         size_t vas_size;
 277         size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if);
 278         u64 num_pages;
 279
 280         if (size > SIZE_MAX - PAGE_SIZE)
 281                 return NULL;
 282         num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
 283         if (num_pages >
 284                  (SIZE_MAX - queue_size) /
 285                  (sizeof(*queue->kernel_if->u.g.pas) +
 286                   sizeof(*queue->kernel_if->u.g.vas)))
 287                 return NULL;
 288
 289         pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas);
 290         vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas);
 291         queue_size += pas_size + vas_size;
 292
 293         queue = vmalloc(queue_size);
 294         if (!queue)
 295                 return NULL;
 296
 297         queue->q_header = NULL;
 298         queue->saved_header = NULL;
 299         queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
 300         queue->kernel_if->mutex = NULL;
 301         queue->kernel_if->num_pages = num_pages;
 302         queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1);
 303         queue->kernel_if->u.g.vas =
 304                 (void **)((u8 *)queue->kernel_if->u.g.pas + pas_size);
 305         queue->kernel_if->host = false;
 306
 307         for (i = 0; i < num_pages; i++) {
 308                 queue->kernel_if->u.g.vas[i] =
 309                         dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE,
 310                                            &queue->kernel_if->u.g.pas[i],
 311                                            GFP_KERNEL);
 312                 if (!queue->kernel_if->u.g.vas[i]) {
 313                         /* Size excl. the header. */
 314                         qp_free_queue(queue, i * PAGE_SIZE);
 315                         return NULL;
 316                 }
 317         }
 318
 319         /* Queue header is the first page. */
 320         queue->q_header = queue->kernel_if->u.g.vas[0];
 321
 322         return queue;
 323 }
 324
 325 /*
 326  * Copies from a given buffer or iovector to a VMCI Queue.  Uses
 327  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
 328  * by traversing the offset -> page translation structure for the queue.
 329  * Assumes that offset + size does not wrap around in the queue.
 330  */
 331 static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
 332                                   u64 queue_offset,
 333                                   struct iov_iter *from,
 334                                   size_t size)
 335 {
 336         struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
 337         size_t bytes_copied = 0;
 338
 339         while (bytes_copied < size) {
 340                 const u64 page_index =
 341                         (queue_offset + bytes_copied) / PAGE_SIZE;
 342                 const size_t page_offset =
 343                     (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
 344                 void *va;
 345                 size_t to_copy;
 346
 347                 if (kernel_if->host)
 348                         va = kmap(kernel_if->u.h.page[page_index]);
 349                 else
 350                         va = kernel_if->u.g.vas[page_index + 1];
 351                         /* Skip header. */
 352
 353                 if (size - bytes_copied > PAGE_SIZE - page_offset)
 354                         /* Enough payload to fill up from this page. */
 355                         to_copy = PAGE_SIZE - page_offset;
 356                 else
 357                         to_copy = size - bytes_copied;
 358
 359                 if (!copy_from_iter_full((u8 *)va + page_offset, to_copy,
 360                                          from)) {
 361                         if (kernel_if->host)
 362                                 kunmap(kernel_if->u.h.page[page_index]);
 363                         return VMCI_ERROR_INVALID_ARGS;
 364                 }
 365                 bytes_copied += to_copy;
 366                 if (kernel_if->host)
 367                         kunmap(kernel_if->u.h.page[page_index]);
 368         }
 369
 370         return VMCI_SUCCESS;
 371 }
 372
 373 /*
 374  * Copies to a given buffer or iovector from a VMCI Queue.  Uses
 375  * kmap()/kunmap() to dynamically map/unmap required portions of the queue
 376  * by traversing the offset -> page translation structure for the queue.
 377  * Assumes that offset + size does not wrap around in the queue.
 378  */
 379 static int qp_memcpy_from_queue_iter(struct iov_iter *to,
 380                                     const struct vmci_queue *queue,
 381                                     u64 queue_offset, size_t size)
 382 {
 383         struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
 384         size_t bytes_copied = 0;
 385
 386         while (bytes_copied < size) {
 387                 const u64 page_index =
 388                         (queue_offset + bytes_copied) / PAGE_SIZE;
 389                 const size_t page_offset =
 390                     (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
 391                 void *va;
 392                 size_t to_copy;
 393                 int err;
 394
 395                 if (kernel_if->host)
 396                         va = kmap(kernel_if->u.h.page[page_index]);
 397                 else
 398                         va = kernel_if->u.g.vas[page_index + 1];
 399                         /* Skip header. */
 400
 401                 if (size - bytes_copied > PAGE_SIZE - page_offset)
 402                         /* Enough payload to fill up this page. */
 403                         to_copy = PAGE_SIZE - page_offset;
 404                 else
 405                         to_copy = size - bytes_copied;
 406
 407                 err = copy_to_iter((u8 *)va + page_offset, to_copy, to);
 408                 if (err != to_copy) {
 409                         if (kernel_if->host)
 410                                 kunmap(kernel_if->u.h.page[page_index]);
 411                         return VMCI_ERROR_INVALID_ARGS;
 412                 }
 413                 bytes_copied += to_copy;
 414                 if (kernel_if->host)
 415                         kunmap(kernel_if->u.h.page[page_index]);
 416         }
 417
 418         return VMCI_SUCCESS;
 419 }
 420
 421 /*
 422  * Allocates two list of PPNs --- one for the pages in the produce queue,
 423  * and the other for the pages in the consume queue. Intializes the list
 424  * of PPNs with the page frame numbers of the KVA for the two queues (and
 425  * the queue headers).
 426  */
 427 static int qp_alloc_ppn_set(void *prod_q,
 428                             u64 num_produce_pages,
 429                             void *cons_q,
 430                             u64 num_consume_pages, struct ppn_set *ppn_set)
 431 {
 432         u64 *produce_ppns;
 433         u64 *consume_ppns;
 434         struct vmci_queue *produce_q = prod_q;
 435         struct vmci_queue *consume_q = cons_q;
 436         u64 i;
 437
 438         if (!produce_q || !num_produce_pages || !consume_q ||
 439             !num_consume_pages || !ppn_set)
 440                 return VMCI_ERROR_INVALID_ARGS;
 441
 442         if (ppn_set->initialized)
 443                 return VMCI_ERROR_ALREADY_EXISTS;
 444
 445         produce_ppns =
 446             kmalloc_array(num_produce_pages, sizeof(*produce_ppns),
 447                           GFP_KERNEL);
 448         if (!produce_ppns)
 449                 return VMCI_ERROR_NO_MEM;
 450
 451         consume_ppns =
 452             kmalloc_array(num_consume_pages, sizeof(*consume_ppns),
 453                           GFP_KERNEL);
 454         if (!consume_ppns) {
 455                 kfree(produce_ppns);
 456                 return VMCI_ERROR_NO_MEM;
 457         }
 458
 459         for (i = 0; i < num_produce_pages; i++)
 460                 produce_ppns[i] =
 461                         produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
 462
 463         for (i = 0; i < num_consume_pages; i++)
 464                 consume_ppns[i] =
 465                         consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
 466
 467         ppn_set->num_produce_pages = num_produce_pages;
 468         ppn_set->num_consume_pages = num_consume_pages;
 469         ppn_set->produce_ppns = produce_ppns;
 470         ppn_set->consume_ppns = consume_ppns;
 471         ppn_set->initialized = true;
 472         return VMCI_SUCCESS;
 473 }
 474
 475 /*
 476  * Frees the two list of PPNs for a queue pair.
 477  */
 478 static void qp_free_ppn_set(struct ppn_set *ppn_set)
 479 {
 480         if (ppn_set->initialized) {
 481                 /* Do not call these functions on NULL inputs. */
 482                 kfree(ppn_set->produce_ppns);
 483                 kfree(ppn_set->consume_ppns);
 484         }
 485         memset(ppn_set, 0, sizeof(*ppn_set));
 486 }
 487
 488 /*
 489  * Populates the list of PPNs in the hypercall structure with the PPNS
 490  * of the produce queue and the consume queue.
 491  */
 492 static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
 493 {
 494         if (vmci_use_ppn64()) {
 495                 memcpy(call_buf, ppn_set->produce_ppns,
 496                        ppn_set->num_produce_pages *
 497                        sizeof(*ppn_set->produce_ppns));
 498                 memcpy(call_buf +
 499                        ppn_set->num_produce_pages *
 500                        sizeof(*ppn_set->produce_ppns),
 501                        ppn_set->consume_ppns,
 502                        ppn_set->num_consume_pages *
 503                        sizeof(*ppn_set->consume_ppns));
 504         } else {
 505                 int i;
 506                 u32 *ppns = (u32 *) call_buf;
 507
 508                 for (i = 0; i < ppn_set->num_produce_pages; i++)
 509                         ppns[i] = (u32) ppn_set->produce_ppns[i];
 510
 511                 ppns = &ppns[ppn_set->num_produce_pages];
 512
 513                 for (i = 0; i < ppn_set->num_consume_pages; i++)
 514                         ppns[i] = (u32) ppn_set->consume_ppns[i];
 515         }
 516
 517         return VMCI_SUCCESS;
 518 }
 519
 520 /*
 521  * Allocates kernel VA space of specified size plus space for the queue
 522  * and kernel interface.  This is different from the guest queue allocator,
 523  * because we do not allocate our own queue header/data pages here but
 524  * share those of the guest.
 525  */
 526 static struct vmci_queue *qp_host_alloc_queue(u64 size)
 527 {
 528         struct vmci_queue *queue;
 529         size_t queue_page_size;
 530         u64 num_pages;
 531         const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
 532
 533         if (size > min_t(size_t, VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - PAGE_SIZE))
 534                 return NULL;
 535         num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
 536         if (num_pages > (SIZE_MAX - queue_size) /
 537                  sizeof(*queue->kernel_if->u.h.page))
 538                 return NULL;
 539
 540         queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page);
 541
 542         if (queue_size + queue_page_size > KMALLOC_MAX_SIZE)
 543                 return NULL;
 544
 545         queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
 546         if (queue) {
 547                 queue->q_header = NULL;
 548                 queue->saved_header = NULL;
 549                 queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
 550                 queue->kernel_if->host = true;
 551                 queue->kernel_if->mutex = NULL;
 552                 queue->kernel_if->num_pages = num_pages;
 553                 queue->kernel_if->u.h.header_page =
 554                     (struct page **)((u8 *)queue + queue_size);
 555                 queue->kernel_if->u.h.page =
 556                         &queue->kernel_if->u.h.header_page[1];
 557         }
 558
 559         return queue;
 560 }
 561
 562 /*
 563  * Frees kernel memory for a given queue (header plus translation
 564  * structure).
 565  */
 566 static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
 567 {
 568         kfree(queue);
 569 }
 570
 571 /*
 572  * Initialize the mutex for the pair of queues.  This mutex is used to
 573  * protect the q_header and the buffer from changing out from under any
 574  * users of either queue.  Of course, it's only any good if the mutexes
 575  * are actually acquired.  Queue structure must lie on non-paged memory
 576  * or we cannot guarantee access to the mutex.
 577  */
 578 static void qp_init_queue_mutex(struct vmci_queue *produce_q,
 579                                 struct vmci_queue *consume_q)
 580 {
 581         /*
 582          * Only the host queue has shared state - the guest queues do not
 583          * need to synchronize access using a queue mutex.
 584          */
 585
 586         if (produce_q->kernel_if->host) {
 587                 produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
 588                 consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
 589                 mutex_init(produce_q->kernel_if->mutex);
 590         }
 591 }
 592
 593 /*
 594  * Cleans up the mutex for the pair of queues.
 595  */
 596 static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
 597                                    struct vmci_queue *consume_q)
 598 {
 599         if (produce_q->kernel_if->host) {
 600                 produce_q->kernel_if->mutex = NULL;
 601                 consume_q->kernel_if->mutex = NULL;
 602         }
 603 }
 604
 605 /*
 606  * Acquire the mutex for the queue.  Note that the produce_q and
 607  * the consume_q share a mutex.  So, only one of the two need to
 608  * be passed in to this routine.  Either will work just fine.
 609  */
 610 static void qp_acquire_queue_mutex(struct vmci_queue *queue)
 611 {
 612         if (queue->kernel_if->host)
 613                 mutex_lock(queue->kernel_if->mutex);
 614 }
 615
 616 /*
 617  * Release the mutex for the queue.  Note that the produce_q and
 618  * the consume_q share a mutex.  So, only one of the two need to
 619  * be passed in to this routine.  Either will work just fine.
 620  */
 621 static void qp_release_queue_mutex(struct vmci_queue *queue)
 622 {
 623         if (queue->kernel_if->host)
 624                 mutex_unlock(queue->kernel_if->mutex);
 625 }
 626
 627 /*
 628  * Helper function to release pages in the PageStoreAttachInfo
 629  * previously obtained using get_user_pages.
 630  */
 631 static void qp_release_pages(struct page **pages,
 632                              u64 num_pages, bool dirty)
 633 {
 634         int i;
 635
 636         for (i = 0; i < num_pages; i++) {
 637                 if (dirty)
 638                         set_page_dirty_lock(pages[i]);
 639
 640                 put_page(pages[i]);
 641                 pages[i] = NULL;
 642         }
 643 }
 644
 645 /*
 646  * Lock the user pages referenced by the {produce,consume}Buffer
 647  * struct into memory and populate the {produce,consume}Pages
 648  * arrays in the attach structure with them.
 649  */
 650 static int qp_host_get_user_memory(u64 produce_uva,
 651                                    u64 consume_uva,
 652                                    struct vmci_queue *produce_q,
 653                                    struct vmci_queue *consume_q)
 654 {
 655         int retval;
 656         int err = VMCI_SUCCESS;
 657
 658         retval = get_user_pages_fast((uintptr_t) produce_uva,
 659                                      produce_q->kernel_if->num_pages,
 660                                      FOLL_WRITE,
 661                                      produce_q->kernel_if->u.h.header_page);
 662         if (retval < (int)produce_q->kernel_if->num_pages) {
 663                 pr_debug("get_user_pages_fast(produce) failed (retval=%d)",
 664                         retval);
 665                 if (retval > 0)
 666                         qp_release_pages(produce_q->kernel_if->u.h.header_page,
 667                                         retval, false);
 668                 err = VMCI_ERROR_NO_MEM;
 669                 goto out;
 670         }
 671
 672         retval = get_user_pages_fast((uintptr_t) consume_uva,
 673                                      consume_q->kernel_if->num_pages,
 674                                      FOLL_WRITE,
 675                                      consume_q->kernel_if->u.h.header_page);
 676         if (retval < (int)consume_q->kernel_if->num_pages) {
 677                 pr_debug("get_user_pages_fast(consume) failed (retval=%d)",
 678                         retval);
 679                 if (retval > 0)
 680                         qp_release_pages(consume_q->kernel_if->u.h.header_page,
 681                                         retval, false);
 682                 qp_release_pages(produce_q->kernel_if->u.h.header_page,
 683                                  produce_q->kernel_if->num_pages, false);
 684                 err = VMCI_ERROR_NO_MEM;
 685         }
 686
 687  out:
 688         return err;
 689 }
 690
 691 /*
 692  * Registers the specification of the user pages used for backing a queue
 693  * pair. Enough information to map in pages is stored in the OS specific
 694  * part of the struct vmci_queue structure.
 695  */
 696 static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
 697                                         struct vmci_queue *produce_q,
 698                                         struct vmci_queue *consume_q)
 699 {
 700         u64 produce_uva;
 701         u64 consume_uva;
 702
 703         /*
 704          * The new style and the old style mapping only differs in
 705          * that we either get a single or two UVAs, so we split the
 706          * single UVA range at the appropriate spot.
 707          */
 708         produce_uva = page_store->pages;
 709         consume_uva = page_store->pages +
 710             produce_q->kernel_if->num_pages * PAGE_SIZE;
 711         return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
 712                                        consume_q);
 713 }
 714
 715 /*
 716  * Releases and removes the references to user pages stored in the attach
 717  * struct.  Pages are released from the page cache and may become
 718  * swappable again.
 719  */
 720 static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
 721                                            struct vmci_queue *consume_q)
 722 {
 723         qp_release_pages(produce_q->kernel_if->u.h.header_page,
 724                          produce_q->kernel_if->num_pages, true);
 725         memset(produce_q->kernel_if->u.h.header_page, 0,
 726                sizeof(*produce_q->kernel_if->u.h.header_page) *
 727                produce_q->kernel_if->num_pages);
 728         qp_release_pages(consume_q->kernel_if->u.h.header_page,
 729                          consume_q->kernel_if->num_pages, true);
 730         memset(consume_q->kernel_if->u.h.header_page, 0,
 731                sizeof(*consume_q->kernel_if->u.h.header_page) *
 732                consume_q->kernel_if->num_pages);
 733 }
 734
 735 /*
 736  * Once qp_host_register_user_memory has been performed on a
 737  * queue, the queue pair headers can be mapped into the
 738  * kernel. Once mapped, they must be unmapped with
 739  * qp_host_unmap_queues prior to calling
 740  * qp_host_unregister_user_memory.
 741  * Pages are pinned.
 742  */
 743 static int qp_host_map_queues(struct vmci_queue *produce_q,
 744                               struct vmci_queue *consume_q)
 745 {
 746         int result;
 747
 748         if (!produce_q->q_header || !consume_q->q_header) {
 749                 struct page *headers[2];
 750
 751                 if (produce_q->q_header != consume_q->q_header)
 752                         return VMCI_ERROR_QUEUEPAIR_MISMATCH;
 753
 754                 if (produce_q->kernel_if->u.h.header_page == NULL ||
 755                     *produce_q->kernel_if->u.h.header_page == NULL)
 756                         return VMCI_ERROR_UNAVAILABLE;
 757
 758                 headers[0] = *produce_q->kernel_if->u.h.header_page;
 759                 headers[1] = *consume_q->kernel_if->u.h.header_page;
 760
 761                 produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
 762                 if (produce_q->q_header != NULL) {
 763                         consume_q->q_header =
 764                             (struct vmci_queue_header *)((u8 *)
 765                                                          produce_q->q_header +
 766                                                          PAGE_SIZE);
 767                         result = VMCI_SUCCESS;
 768                 } else {
 769                         pr_warn("vmap failed\n");
 770                         result = VMCI_ERROR_NO_MEM;
 771                 }
 772         } else {
 773                 result = VMCI_SUCCESS;
 774         }
 775
 776         return result;
 777 }
 778
 779 /*
 780  * Unmaps previously mapped queue pair headers from the kernel.
 781  * Pages are unpinned.
 782  */
 783 static int qp_host_unmap_queues(u32 gid,
 784                                 struct vmci_queue *produce_q,
 785                                 struct vmci_queue *consume_q)
 786 {
 787         if (produce_q->q_header) {
 788                 if (produce_q->q_header < consume_q->q_header)
 789                         vunmap(produce_q->q_header);
 790                 else
 791                         vunmap(consume_q->q_header);
 792
 793                 produce_q->q_header = NULL;
 794                 consume_q->q_header = NULL;
 795         }
 796
 797         return VMCI_SUCCESS;
 798 }
 799
 800 /*
 801  * Finds the entry in the list corresponding to a given handle. Assumes
 802  * that the list is locked.
 803  */
 804 static struct qp_entry *qp_list_find(struct qp_list *qp_list,
 805                                      struct vmci_handle handle)
 806 {
 807         struct qp_entry *entry;
 808
 809         if (vmci_handle_is_invalid(handle))
 810                 return NULL;
 811
 812         list_for_each_entry(entry, &qp_list->head, list_item) {
 813                 if (vmci_handle_is_equal(entry->handle, handle))
 814                         return entry;
 815         }
 816
 817         return NULL;
 818 }
 819
 820 /*
 821  * Finds the entry in the list corresponding to a given handle.
 822  */
 823 static struct qp_guest_endpoint *
 824 qp_guest_handle_to_entry(struct vmci_handle handle)
 825 {
 826         struct qp_guest_endpoint *entry;
 827         struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
 828
 829         entry = qp ? container_of(
 830                 qp, struct qp_guest_endpoint, qp) : NULL;
 831         return entry;
 832 }
 833
 834 /*
 835  * Finds the entry in the list corresponding to a given handle.
 836  */
 837 static struct qp_broker_entry *
 838 qp_broker_handle_to_entry(struct vmci_handle handle)
 839 {
 840         struct qp_broker_entry *entry;
 841         struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
 842
 843         entry = qp ? container_of(
 844                 qp, struct qp_broker_entry, qp) : NULL;
 845         return entry;
 846 }
 847
 848 /*
 849  * Dispatches a queue pair event message directly into the local event
 850  * queue.
 851  */
 852 static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
 853 {
 854         u32 context_id = vmci_get_context_id();
 855         struct vmci_event_qp ev;
 856
 857         ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
 858         ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
 859                                           VMCI_CONTEXT_RESOURCE_ID);
 860         ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
 861         ev.msg.event_data.event =
 862             attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
 863         ev.payload.peer_id = context_id;
 864         ev.payload.handle = handle;
 865
 866         return vmci_event_dispatch(&ev.msg.hdr);
 867 }
 868
 869 /*
 870  * Allocates and initializes a qp_guest_endpoint structure.
 871  * Allocates a queue_pair rid (and handle) iff the given entry has
 872  * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
 873  * are reserved handles.  Assumes that the QP list mutex is held
 874  * by the caller.
 875  */
 876 static struct qp_guest_endpoint *
 877 qp_guest_endpoint_create(struct vmci_handle handle,
 878                          u32 peer,
 879                          u32 flags,
 880                          u64 produce_size,
 881                          u64 consume_size,
 882                          void *produce_q,
 883                          void *consume_q)
 884 {
 885         int result;
 886         struct qp_guest_endpoint *entry;
 887         /* One page each for the queue headers. */
 888         const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
 889             DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
 890
 891         if (vmci_handle_is_invalid(handle)) {
 892                 u32 context_id = vmci_get_context_id();
 893
 894                 handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
 895         }
 896
 897         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 898         if (entry) {
 899                 entry->qp.peer = peer;
 900                 entry->qp.flags = flags;
 901                 entry->qp.produce_size = produce_size;
 902                 entry->qp.consume_size = consume_size;
 903                 entry->qp.ref_count = 0;
 904                 entry->num_ppns = num_ppns;
 905                 entry->produce_q = produce_q;
 906                 entry->consume_q = consume_q;
 907                 INIT_LIST_HEAD(&entry->qp.list_item);
 908
 909                 /* Add resource obj */
 910                 result = vmci_resource_add(&entry->resource,
 911                                            VMCI_RESOURCE_TYPE_QPAIR_GUEST,
 912                                            handle);
 913                 entry->qp.handle = vmci_resource_handle(&entry->resource);
 914                 if ((result != VMCI_SUCCESS) ||
 915                     qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
 916                         pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
 917                                 handle.context, handle.resource, result);
 918                         kfree(entry);
 919                         entry = NULL;
 920                 }
 921         }
 922         return entry;
 923 }
 924
 925 /*
 926  * Frees a qp_guest_endpoint structure.
 927  */
 928 static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
 929 {
 930         qp_free_ppn_set(&entry->ppn_set);
 931         qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
 932         qp_free_queue(entry->produce_q, entry->qp.produce_size);
 933         qp_free_queue(entry->consume_q, entry->qp.consume_size);
 934         /* Unlink from resource hash table and free callback */
 935         vmci_resource_remove(&entry->resource);
 936
 937         kfree(entry);
 938 }
 939
 940 /*
 941  * Helper to make a queue_pairAlloc hypercall when the driver is
 942  * supporting a guest device.
 943  */
 944 static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
 945 {
 946         struct vmci_qp_alloc_msg *alloc_msg;
 947         size_t msg_size;
 948         size_t ppn_size;
 949         int result;
 950
 951         if (!entry || entry->num_ppns <= 2)
 952                 return VMCI_ERROR_INVALID_ARGS;
 953
 954         ppn_size = vmci_use_ppn64() ? sizeof(u64) : sizeof(u32);
 955         msg_size = sizeof(*alloc_msg) +
 956             (size_t) entry->num_ppns * ppn_size;
 957         alloc_msg = kmalloc(msg_size, GFP_KERNEL);
 958         if (!alloc_msg)
 959                 return VMCI_ERROR_NO_MEM;
 960
 961         alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
 962                                               VMCI_QUEUEPAIR_ALLOC);
 963         alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
 964         alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
 965         alloc_msg->handle = entry->qp.handle;
 966         alloc_msg->peer = entry->qp.peer;
 967         alloc_msg->flags = entry->qp.flags;
 968         alloc_msg->produce_size = entry->qp.produce_size;
 969         alloc_msg->consume_size = entry->qp.consume_size;
 970         alloc_msg->num_ppns = entry->num_ppns;
 971
 972         result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
 973                                      &entry->ppn_set);
 974         if (result == VMCI_SUCCESS)
 975                 result = vmci_send_datagram(&alloc_msg->hdr);
 976
 977         kfree(alloc_msg);
 978
 979         return result;
 980 }
 981
 982 /*
 983  * Helper to make a queue_pairDetach hypercall when the driver is
 984  * supporting a guest device.
 985  */
 986 static int qp_detatch_hypercall(struct vmci_handle handle)
 987 {
 988         struct vmci_qp_detach_msg detach_msg;
 989
 990         detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
 991                                               VMCI_QUEUEPAIR_DETACH);
 992         detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
 993         detach_msg.hdr.payload_size = sizeof(handle);
 994         detach_msg.handle = handle;
 995
 996         return vmci_send_datagram(&detach_msg.hdr);
 997 }
 998
 999 /*
1000  * Adds the given entry to the list. Assumes that the list is locked.
1001  */
1002 static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
1003 {
1004         if (entry)
1005                 list_add(&entry->list_item, &qp_list->head);
1006 }
1007
1008 /*
1009  * Removes the given entry from the list. Assumes that the list is locked.
1010  */
1011 static void qp_list_remove_entry(struct qp_list *qp_list,
1012                                  struct qp_entry *entry)
1013 {
1014         if (entry)
1015                 list_del(&entry->list_item);
1016 }
1017
1018 /*
1019  * Helper for VMCI queue_pair detach interface. Frees the physical
1020  * pages for the queue pair.
1021  */
1022 static int qp_detatch_guest_work(struct vmci_handle handle)
1023 {
1024         int result;
1025         struct qp_guest_endpoint *entry;
1026         u32 ref_count = ~0;     /* To avoid compiler warning below */
1027
1028         mutex_lock(&qp_guest_endpoints.mutex);
1029
1030         entry = qp_guest_handle_to_entry(handle);
1031         if (!entry) {
1032                 mutex_unlock(&qp_guest_endpoints.mutex);
1033                 return VMCI_ERROR_NOT_FOUND;
1034         }
1035
1036         if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1037                 result = VMCI_SUCCESS;
1038
1039                 if (entry->qp.ref_count > 1) {
1040                         result = qp_notify_peer_local(false, handle);
1041                         /*
1042                          * We can fail to notify a local queuepair
1043                          * because we can't allocate.  We still want
1044                          * to release the entry if that happens, so
1045                          * don't bail out yet.
1046                          */
1047                 }
1048         } else {
1049                 result = qp_detatch_hypercall(handle);
1050                 if (result < VMCI_SUCCESS) {
1051                         /*
1052                          * We failed to notify a non-local queuepair.
1053                          * That other queuepair might still be
1054                          * accessing the shared memory, so don't
1055                          * release the entry yet.  It will get cleaned
1056                          * up by VMCIqueue_pair_Exit() if necessary
1057                          * (assuming we are going away, otherwise why
1058                          * did this fail?).
1059                          */
1060
1061                         mutex_unlock(&qp_guest_endpoints.mutex);
1062                         return result;
1063                 }
1064         }
1065
1066         /*
1067          * If we get here then we either failed to notify a local queuepair, or
1068          * we succeeded in all cases.  Release the entry if required.
1069          */
1070
1071         entry->qp.ref_count--;
1072         if (entry->qp.ref_count == 0)
1073                 qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
1074
1075         /* If we didn't remove the entry, this could change once we unlock. */
1076         if (entry)
1077                 ref_count = entry->qp.ref_count;
1078
1079         mutex_unlock(&qp_guest_endpoints.mutex);
1080
1081         if (ref_count == 0)
1082                 qp_guest_endpoint_destroy(entry);
1083
1084         return result;
1085 }
1086
1087 /*
1088  * This functions handles the actual allocation of a VMCI queue
1089  * pair guest endpoint. Allocates physical pages for the queue
1090  * pair. It makes OS dependent calls through generic wrappers.
1091  */
1092 static int qp_alloc_guest_work(struct vmci_handle *handle,
1093                                struct vmci_queue **produce_q,
1094                                u64 produce_size,
1095                                struct vmci_queue **consume_q,
1096                                u64 consume_size,
1097                                u32 peer,
1098                                u32 flags,
1099                                u32 priv_flags)
1100 {
1101         const u64 num_produce_pages =
1102             DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
1103         const u64 num_consume_pages =
1104             DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
1105         void *my_produce_q = NULL;
1106         void *my_consume_q = NULL;
1107         int result;
1108         struct qp_guest_endpoint *queue_pair_entry = NULL;
1109
1110         if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
1111                 return VMCI_ERROR_NO_ACCESS;
1112
1113         mutex_lock(&qp_guest_endpoints.mutex);
1114
1115         queue_pair_entry = qp_guest_handle_to_entry(*handle);
1116         if (queue_pair_entry) {
1117                 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1118                         /* Local attach case. */
1119                         if (queue_pair_entry->qp.ref_count > 1) {
1120                                 pr_devel("Error attempting to attach more than once\n");
1121                                 result = VMCI_ERROR_UNAVAILABLE;
1122                                 goto error_keep_entry;
1123                         }
1124
1125                         if (queue_pair_entry->qp.produce_size != consume_size ||
1126                             queue_pair_entry->qp.consume_size !=
1127                             produce_size ||
1128                             queue_pair_entry->qp.flags !=
1129                             (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
1130                                 pr_devel("Error mismatched queue pair in local attach\n");
1131                                 result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
1132                                 goto error_keep_entry;
1133                         }
1134
1135                         /*
1136                          * Do a local attach.  We swap the consume and
1137                          * produce queues for the attacher and deliver
1138                          * an attach event.
1139                          */
1140                         result = qp_notify_peer_local(true, *handle);
1141                         if (result < VMCI_SUCCESS)
1142                                 goto error_keep_entry;
1143
1144                         my_produce_q = queue_pair_entry->consume_q;
1145                         my_consume_q = queue_pair_entry->produce_q;
1146                         goto out;
1147                 }
1148
1149                 result = VMCI_ERROR_ALREADY_EXISTS;
1150                 goto error_keep_entry;
1151         }
1152
1153         my_produce_q = qp_alloc_queue(produce_size, flags);
1154         if (!my_produce_q) {
1155                 pr_warn("Error allocating pages for produce queue\n");
1156                 result = VMCI_ERROR_NO_MEM;
1157                 goto error;
1158         }
1159
1160         my_consume_q = qp_alloc_queue(consume_size, flags);
1161         if (!my_consume_q) {
1162                 pr_warn("Error allocating pages for consume queue\n");
1163                 result = VMCI_ERROR_NO_MEM;
1164                 goto error;
1165         }
1166
1167         queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
1168                                                     produce_size, consume_size,
1169                                                     my_produce_q, my_consume_q);
1170         if (!queue_pair_entry) {
1171                 pr_warn("Error allocating memory in %s\n", __func__);
1172                 result = VMCI_ERROR_NO_MEM;
1173                 goto error;
1174         }
1175
1176         result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
1177                                   num_consume_pages,
1178                                   &queue_pair_entry->ppn_set);
1179         if (result < VMCI_SUCCESS) {
1180                 pr_warn("qp_alloc_ppn_set failed\n");
1181                 goto error;
1182         }
1183
1184         /*
1185          * It's only necessary to notify the host if this queue pair will be
1186          * attached to from another context.
1187          */
1188         if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
1189                 /* Local create case. */
1190                 u32 context_id = vmci_get_context_id();
1191
1192                 /*
1193                  * Enforce similar checks on local queue pairs as we
1194                  * do for regular ones.  The handle's context must
1195                  * match the creator or attacher context id (here they
1196                  * are both the current context id) and the
1197                  * attach-only flag cannot exist during create.  We
1198                  * also ensure specified peer is this context or an
1199                  * invalid one.
1200                  */
1201                 if (queue_pair_entry->qp.handle.context != context_id ||
1202                     (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
1203                      queue_pair_entry->qp.peer != context_id)) {
1204                         result = VMCI_ERROR_NO_ACCESS;
1205                         goto error;
1206                 }
1207
1208                 if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
1209                         result = VMCI_ERROR_NOT_FOUND;
1210                         goto error;
1211                 }
1212         } else {
1213                 result = qp_alloc_hypercall(queue_pair_entry);
1214                 if (result < VMCI_SUCCESS) {
1215                         pr_devel("qp_alloc_hypercall result = %d\n", result);
1216                         goto error;
1217                 }
1218         }
1219
1220         qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
1221                             (struct vmci_queue *)my_consume_q);
1222
1223         qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
1224
1225  out:
1226         queue_pair_entry->qp.ref_count++;
1227         *handle = queue_pair_entry->qp.handle;
1228         *produce_q = (struct vmci_queue *)my_produce_q;
1229         *consume_q = (struct vmci_queue *)my_consume_q;
1230
1231         /*
1232          * We should initialize the queue pair header pages on a local
1233          * queue pair create.  For non-local queue pairs, the
1234          * hypervisor initializes the header pages in the create step.
1235          */
1236         if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
1237             queue_pair_entry->qp.ref_count == 1) {
1238                 vmci_q_header_init((*produce_q)->q_header, *handle);
1239                 vmci_q_header_init((*consume_q)->q_header, *handle);
1240         }
1241
1242         mutex_unlock(&qp_guest_endpoints.mutex);
1243
1244         return VMCI_SUCCESS;
1245
1246  error:
1247         mutex_unlock(&qp_guest_endpoints.mutex);
1248         if (queue_pair_entry) {
1249                 /* The queues will be freed inside the destroy routine. */
1250                 qp_guest_endpoint_destroy(queue_pair_entry);
1251         } else {
1252                 qp_free_queue(my_produce_q, produce_size);
1253                 qp_free_queue(my_consume_q, consume_size);
1254         }
1255         return result;
1256
1257  error_keep_entry:
1258         /* This path should only be used when an existing entry was found. */
1259         mutex_unlock(&qp_guest_endpoints.mutex);
1260         return result;
1261 }
1262
1263 /*
1264  * The first endpoint issuing a queue pair allocation will create the state
1265  * of the queue pair in the queue pair broker.
1266  *
1267  * If the creator is a guest, it will associate a VMX virtual address range
1268  * with the queue pair as specified by the page_store. For compatibility with
1269  * older VMX'en, that would use a separate step to set the VMX virtual
1270  * address range, the virtual address range can be registered later using
1271  * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
1272  * used.
1273  *
1274  * If the creator is the host, a page_store of NULL should be used as well,
1275  * since the host is not able to supply a page store for the queue pair.
1276  *
1277  * For older VMX and host callers, the queue pair will be created in the
1278  * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
1279  * created in VMCOQPB_CREATED_MEM state.
1280  */
1281 static int qp_broker_create(struct vmci_handle handle,
1282                             u32 peer,
1283                             u32 flags,
1284                             u32 priv_flags,
1285                             u64 produce_size,
1286                             u64 consume_size,
1287                             struct vmci_qp_page_store *page_store,
1288                             struct vmci_ctx *context,
1289                             vmci_event_release_cb wakeup_cb,
1290                             void *client_data, struct qp_broker_entry **ent)
1291 {
1292         struct qp_broker_entry *entry = NULL;
1293         const u32 context_id = vmci_ctx_get_id(context);
1294         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1295         int result;
1296         u64 guest_produce_size;
1297         u64 guest_consume_size;
1298
1299         /* Do not create if the caller asked not to. */
1300         if (flags & VMCI_QPFLAG_ATTACH_ONLY)
1301                 return VMCI_ERROR_NOT_FOUND;
1302
1303         /*
1304          * Creator's context ID should match handle's context ID or the creator
1305          * must allow the context in handle's context ID as the "peer".
1306          */
1307         if (handle.context != context_id && handle.context != peer)
1308                 return VMCI_ERROR_NO_ACCESS;
1309
1310         if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
1311                 return VMCI_ERROR_DST_UNREACHABLE;
1312
1313         /*
1314          * Creator's context ID for local queue pairs should match the
1315          * peer, if a peer is specified.
1316          */
1317         if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
1318                 return VMCI_ERROR_NO_ACCESS;
1319
1320         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
1321         if (!entry)
1322                 return VMCI_ERROR_NO_MEM;
1323
1324         if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
1325                 /*
1326                  * The queue pair broker entry stores values from the guest
1327                  * point of view, so a creating host side endpoint should swap
1328                  * produce and consume values -- unless it is a local queue
1329                  * pair, in which case no swapping is necessary, since the local
1330                  * attacher will swap queues.
1331                  */
1332
1333                 guest_produce_size = consume_size;
1334                 guest_consume_size = produce_size;
1335         } else {
1336                 guest_produce_size = produce_size;
1337                 guest_consume_size = consume_size;
1338         }
1339
1340         entry->qp.handle = handle;
1341         entry->qp.peer = peer;
1342         entry->qp.flags = flags;
1343         entry->qp.produce_size = guest_produce_size;
1344         entry->qp.consume_size = guest_consume_size;
1345         entry->qp.ref_count = 1;
1346         entry->create_id = context_id;
1347         entry->attach_id = VMCI_INVALID_ID;
1348         entry->state = VMCIQPB_NEW;
1349         entry->require_trusted_attach =
1350             !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
1351         entry->created_by_trusted =
1352             !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
1353         entry->vmci_page_files = false;
1354         entry->wakeup_cb = wakeup_cb;
1355         entry->client_data = client_data;
1356         entry->produce_q = qp_host_alloc_queue(guest_produce_size);
1357         if (entry->produce_q == NULL) {
1358                 result = VMCI_ERROR_NO_MEM;
1359                 goto error;
1360         }
1361         entry->consume_q = qp_host_alloc_queue(guest_consume_size);
1362         if (entry->consume_q == NULL) {
1363                 result = VMCI_ERROR_NO_MEM;
1364                 goto error;
1365         }
1366
1367         qp_init_queue_mutex(entry->produce_q, entry->consume_q);
1368
1369         INIT_LIST_HEAD(&entry->qp.list_item);
1370
1371         if (is_local) {
1372                 u8 *tmp;
1373
1374                 entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
1375                                            PAGE_SIZE, GFP_KERNEL);
1376                 if (entry->local_mem == NULL) {
1377                         result = VMCI_ERROR_NO_MEM;
1378                         goto error;
1379                 }
1380                 entry->state = VMCIQPB_CREATED_MEM;
1381                 entry->produce_q->q_header = entry->local_mem;
1382                 tmp = (u8 *)entry->local_mem + PAGE_SIZE *
1383                     (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
1384                 entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
1385         } else if (page_store) {
1386                 /*
1387                  * The VMX already initialized the queue pair headers, so no
1388                  * need for the kernel side to do that.
1389                  */
1390                 result = qp_host_register_user_memory(page_store,
1391                                                       entry->produce_q,
1392                                                       entry->consume_q);
1393                 if (result < VMCI_SUCCESS)
1394                         goto error;
1395
1396                 entry->state = VMCIQPB_CREATED_MEM;
1397         } else {
1398                 /*
1399                  * A create without a page_store may be either a host
1400                  * side create (in which case we are waiting for the
1401                  * guest side to supply the memory) or an old style
1402                  * queue pair create (in which case we will expect a
1403                  * set page store call as the next step).
1404                  */
1405                 entry->state = VMCIQPB_CREATED_NO_MEM;
1406         }
1407
1408         qp_list_add_entry(&qp_broker_list, &entry->qp);
1409         if (ent != NULL)
1410                 *ent = entry;
1411
1412         /* Add to resource obj */
1413         result = vmci_resource_add(&entry->resource,
1414                                    VMCI_RESOURCE_TYPE_QPAIR_HOST,
1415                                    handle);
1416         if (result != VMCI_SUCCESS) {
1417                 pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
1418                         handle.context, handle.resource, result);
1419                 goto error;
1420         }
1421
1422         entry->qp.handle = vmci_resource_handle(&entry->resource);
1423         if (is_local) {
1424                 vmci_q_header_init(entry->produce_q->q_header,
1425                                    entry->qp.handle);
1426                 vmci_q_header_init(entry->consume_q->q_header,
1427                                    entry->qp.handle);
1428         }
1429
1430         vmci_ctx_qp_create(context, entry->qp.handle);
1431
1432         return VMCI_SUCCESS;
1433
1434  error:
1435         if (entry != NULL) {
1436                 qp_host_free_queue(entry->produce_q, guest_produce_size);
1437                 qp_host_free_queue(entry->consume_q, guest_consume_size);
1438                 kfree(entry);
1439         }
1440
1441         return result;
1442 }
1443
1444 /*
1445  * Enqueues an event datagram to notify the peer VM attached to
1446  * the given queue pair handle about attach/detach event by the
1447  * given VM.  Returns Payload size of datagram enqueued on
1448  * success, error code otherwise.
1449  */
1450 static int qp_notify_peer(bool attach,
1451                           struct vmci_handle handle,
1452                           u32 my_id,
1453                           u32 peer_id)
1454 {
1455         int rv;
1456         struct vmci_event_qp ev;
1457
1458         if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
1459             peer_id == VMCI_INVALID_ID)
1460                 return VMCI_ERROR_INVALID_ARGS;
1461
1462         /*
1463          * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
1464          * number of pending events from the hypervisor to a given VM
1465          * otherwise a rogue VM could do an arbitrary number of attach
1466          * and detach operations causing memory pressure in the host
1467          * kernel.
1468          */
1469
1470         ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
1471         ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
1472                                           VMCI_CONTEXT_RESOURCE_ID);
1473         ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
1474         ev.msg.event_data.event = attach ?
1475             VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
1476         ev.payload.handle = handle;
1477         ev.payload.peer_id = my_id;
1478
1479         rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
1480                                     &ev.msg.hdr, false);
1481         if (rv < VMCI_SUCCESS)
1482                 pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
1483                         attach ? "ATTACH" : "DETACH", peer_id);
1484
1485         return rv;
1486 }
1487
1488 /*
1489  * The second endpoint issuing a queue pair allocation will attach to
1490  * the queue pair registered with the queue pair broker.
1491  *
1492  * If the attacher is a guest, it will associate a VMX virtual address
1493  * range with the queue pair as specified by the page_store. At this
1494  * point, the already attach host endpoint may start using the queue
1495  * pair, and an attach event is sent to it. For compatibility with
1496  * older VMX'en, that used a separate step to set the VMX virtual
1497  * address range, the virtual address range can be registered later
1498  * using vmci_qp_broker_set_page_store. In that case, a page_store of
1499  * NULL should be used, and the attach event will be generated once
1500  * the actual page store has been set.
1501  *
1502  * If the attacher is the host, a page_store of NULL should be used as
1503  * well, since the page store information is already set by the guest.
1504  *
1505  * For new VMX and host callers, the queue pair will be moved to the
1506  * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
1507  * moved to the VMCOQPB_ATTACHED_NO_MEM state.
1508  */
1509 static int qp_broker_attach(struct qp_broker_entry *entry,
1510                             u32 peer,
1511                             u32 flags,
1512                             u32 priv_flags,
1513                             u64 produce_size,
1514                             u64 consume_size,
1515                             struct vmci_qp_page_store *page_store,
1516                             struct vmci_ctx *context,
1517                             vmci_event_release_cb wakeup_cb,
1518                             void *client_data,
1519                             struct qp_broker_entry **ent)
1520 {
1521         const u32 context_id = vmci_ctx_get_id(context);
1522         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1523         int result;
1524
1525         if (entry->state != VMCIQPB_CREATED_NO_MEM &&
1526             entry->state != VMCIQPB_CREATED_MEM)
1527                 return VMCI_ERROR_UNAVAILABLE;
1528
1529         if (is_local) {
1530                 if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
1531                     context_id != entry->create_id) {
1532                         return VMCI_ERROR_INVALID_ARGS;
1533                 }
1534         } else if (context_id == entry->create_id ||
1535                    context_id == entry->attach_id) {
1536                 return VMCI_ERROR_ALREADY_EXISTS;
1537         }
1538
1539         if (VMCI_CONTEXT_IS_VM(context_id) &&
1540             VMCI_CONTEXT_IS_VM(entry->create_id))
1541                 return VMCI_ERROR_DST_UNREACHABLE;
1542
1543         /*
1544          * If we are attaching from a restricted context then the queuepair
1545          * must have been created by a trusted endpoint.
1546          */
1547         if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
1548             !entry->created_by_trusted)
1549                 return VMCI_ERROR_NO_ACCESS;
1550
1551         /*
1552          * If we are attaching to a queuepair that was created by a restricted
1553          * context then we must be trusted.
1554          */
1555         if (entry->require_trusted_attach &&
1556             (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
1557                 return VMCI_ERROR_NO_ACCESS;
1558
1559         /*
1560          * If the creator specifies VMCI_INVALID_ID in "peer" field, access
1561          * control check is not performed.
1562          */
1563         if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
1564                 return VMCI_ERROR_NO_ACCESS;
1565
1566         if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
1567                 /*
1568                  * Do not attach if the caller doesn't support Host Queue Pairs
1569                  * and a host created this queue pair.
1570                  */
1571
1572                 if (!vmci_ctx_supports_host_qp(context))
1573                         return VMCI_ERROR_INVALID_RESOURCE;
1574
1575         } else if (context_id == VMCI_HOST_CONTEXT_ID) {
1576                 struct vmci_ctx *create_context;
1577                 bool supports_host_qp;
1578
1579                 /*
1580                  * Do not attach a host to a user created queue pair if that
1581                  * user doesn't support host queue pair end points.
1582                  */
1583
1584                 create_context = vmci_ctx_get(entry->create_id);
1585                 supports_host_qp = vmci_ctx_supports_host_qp(create_context);
1586                 vmci_ctx_put(create_context);
1587
1588                 if (!supports_host_qp)
1589                         return VMCI_ERROR_INVALID_RESOURCE;
1590         }
1591
1592         if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
1593                 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1594
1595         if (context_id != VMCI_HOST_CONTEXT_ID) {
1596                 /*
1597                  * The queue pair broker entry stores values from the guest
1598                  * point of view, so an attaching guest should match the values
1599                  * stored in the entry.
1600                  */
1601
1602                 if (entry->qp.produce_size != produce_size ||
1603                     entry->qp.consume_size != consume_size) {
1604                         return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1605                 }
1606         } else if (entry->qp.produce_size != consume_size ||
1607                    entry->qp.consume_size != produce_size) {
1608                 return VMCI_ERROR_QUEUEPAIR_MISMATCH;
1609         }
1610
1611         if (context_id != VMCI_HOST_CONTEXT_ID) {
1612                 /*
1613                  * If a guest attached to a queue pair, it will supply
1614                  * the backing memory.  If this is a pre NOVMVM vmx,
1615                  * the backing memory will be supplied by calling
1616                  * vmci_qp_broker_set_page_store() following the
1617                  * return of the vmci_qp_broker_alloc() call. If it is
1618                  * a vmx of version NOVMVM or later, the page store
1619                  * must be supplied as part of the
1620                  * vmci_qp_broker_alloc call.  Under all circumstances
1621                  * must the initially created queue pair not have any
1622                  * memory associated with it already.
1623                  */
1624
1625                 if (entry->state != VMCIQPB_CREATED_NO_MEM)
1626                         return VMCI_ERROR_INVALID_ARGS;
1627
1628                 if (page_store != NULL) {
1629                         /*
1630                          * Patch up host state to point to guest
1631                          * supplied memory. The VMX already
1632                          * initialized the queue pair headers, so no
1633                          * need for the kernel side to do that.
1634                          */
1635
1636                         result = qp_host_register_user_memory(page_store,
1637                                                               entry->produce_q,
1638                                                               entry->consume_q);
1639                         if (result < VMCI_SUCCESS)
1640                                 return result;
1641
1642                         entry->state = VMCIQPB_ATTACHED_MEM;
1643                 } else {
1644                         entry->state = VMCIQPB_ATTACHED_NO_MEM;
1645                 }
1646         } else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
1647                 /*
1648                  * The host side is attempting to attach to a queue
1649                  * pair that doesn't have any memory associated with
1650                  * it. This must be a pre NOVMVM vmx that hasn't set
1651                  * the page store information yet, or a quiesced VM.
1652                  */
1653
1654                 return VMCI_ERROR_UNAVAILABLE;
1655         } else {
1656                 /* The host side has successfully attached to a queue pair. */
1657                 entry->state = VMCIQPB_ATTACHED_MEM;
1658         }
1659
1660         if (entry->state == VMCIQPB_ATTACHED_MEM) {
1661                 result =
1662                     qp_notify_peer(true, entry->qp.handle, context_id,
1663                                    entry->create_id);
1664                 if (result < VMCI_SUCCESS)
1665                         pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
1666                                 entry->create_id, entry->qp.handle.context,
1667                                 entry->qp.handle.resource);
1668         }
1669
1670         entry->attach_id = context_id;
1671         entry->qp.ref_count++;
1672         if (wakeup_cb) {
1673                 entry->wakeup_cb = wakeup_cb;
1674                 entry->client_data = client_data;
1675         }
1676
1677         /*
1678          * When attaching to local queue pairs, the context already has
1679          * an entry tracking the queue pair, so don't add another one.
1680          */
1681         if (!is_local)
1682                 vmci_ctx_qp_create(context, entry->qp.handle);
1683
1684         if (ent != NULL)
1685                 *ent = entry;
1686
1687         return VMCI_SUCCESS;
1688 }
1689
1690 /*
1691  * queue_pair_Alloc for use when setting up queue pair endpoints
1692  * on the host.
1693  */
1694 static int qp_broker_alloc(struct vmci_handle handle,
1695                            u32 peer,
1696                            u32 flags,
1697                            u32 priv_flags,
1698                            u64 produce_size,
1699                            u64 consume_size,
1700                            struct vmci_qp_page_store *page_store,
1701                            struct vmci_ctx *context,
1702                            vmci_event_release_cb wakeup_cb,
1703                            void *client_data,
1704                            struct qp_broker_entry **ent,
1705                            bool *swap)
1706 {
1707         const u32 context_id = vmci_ctx_get_id(context);
1708         bool create;
1709         struct qp_broker_entry *entry = NULL;
1710         bool is_local = flags & VMCI_QPFLAG_LOCAL;
1711         int result;
1712
1713         if (vmci_handle_is_invalid(handle) ||
1714             (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
1715             !(produce_size || consume_size) ||
1716             !context || context_id == VMCI_INVALID_ID ||
1717             handle.context == VMCI_INVALID_ID) {
1718                 return VMCI_ERROR_INVALID_ARGS;
1719         }
1720
1721         if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
1722                 return VMCI_ERROR_INVALID_ARGS;
1723
1724         /*
1725          * In the initial argument check, we ensure that non-vmkernel hosts
1726          * are not allowed to create local queue pairs.
1727          */
1728
1729         mutex_lock(&qp_broker_list.mutex);
1730
1731         if (!is_local && vmci_ctx_qp_exists(context, handle)) {
1732                 pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
1733                          context_id, handle.context, handle.resource);
1734                 mutex_unlock(&qp_broker_list.mutex);
1735                 return VMCI_ERROR_ALREADY_EXISTS;
1736         }
1737
1738         if (handle.resource != VMCI_INVALID_ID)
1739                 entry = qp_broker_handle_to_entry(handle);
1740
1741         if (!entry) {
1742                 create = true;
1743                 result =
1744                     qp_broker_create(handle, peer, flags, priv_flags,
1745                                      produce_size, consume_size, page_store,
1746                                      context, wakeup_cb, client_data, ent);
1747         } else {
1748                 create = false;
1749                 result =
1750                     qp_broker_attach(entry, peer, flags, priv_flags,
1751                                      produce_size, consume_size, page_store,
1752                                      context, wakeup_cb, client_data, ent);
1753         }
1754
1755         mutex_unlock(&qp_broker_list.mutex);
1756
1757         if (swap)
1758                 *swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
1759                     !(create && is_local);
1760
1761         return result;
1762 }
1763
1764 /*
1765  * This function implements the kernel API for allocating a queue
1766  * pair.
1767  */
1768 static int qp_alloc_host_work(struct vmci_handle *handle,
1769                               struct vmci_queue **produce_q,
1770                               u64 produce_size,
1771                               struct vmci_queue **consume_q,
1772                               u64 consume_size,
1773                               u32 peer,
1774                               u32 flags,
1775                               u32 priv_flags,
1776                               vmci_event_release_cb wakeup_cb,
1777                               void *client_data)
1778 {
1779         struct vmci_handle new_handle;
1780         struct vmci_ctx *context;
1781         struct qp_broker_entry *entry;
1782         int result;
1783         bool swap;
1784
1785         if (vmci_handle_is_invalid(*handle)) {
1786                 new_handle = vmci_make_handle(
1787                         VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
1788         } else
1789                 new_handle = *handle;
1790
1791         context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1792         entry = NULL;
1793         result =
1794             qp_broker_alloc(new_handle, peer, flags, priv_flags,
1795                             produce_size, consume_size, NULL, context,
1796                             wakeup_cb, client_data, &entry, &swap);
1797         if (result == VMCI_SUCCESS) {
1798                 if (swap) {
1799                         /*
1800                          * If this is a local queue pair, the attacher
1801                          * will swap around produce and consume
1802                          * queues.
1803                          */
1804
1805                         *produce_q = entry->consume_q;
1806                         *consume_q = entry->produce_q;
1807                 } else {
1808                         *produce_q = entry->produce_q;
1809                         *consume_q = entry->consume_q;
1810                 }
1811
1812                 *handle = vmci_resource_handle(&entry->resource);
1813         } else {
1814                 *handle = VMCI_INVALID_HANDLE;
1815                 pr_devel("queue pair broker failed to alloc (result=%d)\n",
1816                          result);
1817         }
1818         vmci_ctx_put(context);
1819         return result;
1820 }
1821
1822 /*
1823  * Allocates a VMCI queue_pair. Only checks validity of input
1824  * arguments. The real work is done in the host or guest
1825  * specific function.
1826  */
1827 int vmci_qp_alloc(struct vmci_handle *handle,
1828                   struct vmci_queue **produce_q,
1829                   u64 produce_size,
1830                   struct vmci_queue **consume_q,
1831                   u64 consume_size,
1832                   u32 peer,
1833                   u32 flags,
1834                   u32 priv_flags,
1835                   bool guest_endpoint,
1836                   vmci_event_release_cb wakeup_cb,
1837                   void *client_data)
1838 {
1839         if (!handle || !produce_q || !consume_q ||
1840             (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
1841                 return VMCI_ERROR_INVALID_ARGS;
1842
1843         if (guest_endpoint) {
1844                 return qp_alloc_guest_work(handle, produce_q,
1845                                            produce_size, consume_q,
1846                                            consume_size, peer,
1847                                            flags, priv_flags);
1848         } else {
1849                 return qp_alloc_host_work(handle, produce_q,
1850                                           produce_size, consume_q,
1851                                           consume_size, peer, flags,
1852                                           priv_flags, wakeup_cb, client_data);
1853         }
1854 }
1855
1856 /*
1857  * This function implements the host kernel API for detaching from
1858  * a queue pair.
1859  */
1860 static int qp_detatch_host_work(struct vmci_handle handle)
1861 {
1862         int result;
1863         struct vmci_ctx *context;
1864
1865         context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
1866
1867         result = vmci_qp_broker_detach(handle, context);
1868
1869         vmci_ctx_put(context);
1870         return result;
1871 }
1872
1873 /*
1874  * Detaches from a VMCI queue_pair. Only checks validity of input argument.
1875  * Real work is done in the host or guest specific function.
1876  */
1877 static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
1878 {
1879         if (vmci_handle_is_invalid(handle))
1880                 return VMCI_ERROR_INVALID_ARGS;
1881
1882         if (guest_endpoint)
1883                 return qp_detatch_guest_work(handle);
1884         else
1885                 return qp_detatch_host_work(handle);
1886 }
1887
1888 /*
1889  * Returns the entry from the head of the list. Assumes that the list is
1890  * locked.
1891  */
1892 static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
1893 {
1894         if (!list_empty(&qp_list->head)) {
1895                 struct qp_entry *entry =
1896                     list_first_entry(&qp_list->head, struct qp_entry,
1897                                      list_item);
1898                 return entry;
1899         }
1900
1901         return NULL;
1902 }
1903
1904 void vmci_qp_broker_exit(void)
1905 {
1906         struct qp_entry *entry;
1907         struct qp_broker_entry *be;
1908
1909         mutex_lock(&qp_broker_list.mutex);
1910
1911         while ((entry = qp_list_get_head(&qp_broker_list))) {
1912                 be = (struct qp_broker_entry *)entry;
1913
1914                 qp_list_remove_entry(&qp_broker_list, entry);
1915                 kfree(be);
1916         }
1917
1918         mutex_unlock(&qp_broker_list.mutex);
1919 }
1920
1921 /*
1922  * Requests that a queue pair be allocated with the VMCI queue
1923  * pair broker. Allocates a queue pair entry if one does not
1924  * exist. Attaches to one if it exists, and retrieves the page
1925  * files backing that queue_pair.  Assumes that the queue pair
1926  * broker lock is held.
1927  */
1928 int vmci_qp_broker_alloc(struct vmci_handle handle,
1929                          u32 peer,
1930                          u32 flags,
1931                          u32 priv_flags,
1932                          u64 produce_size,
1933                          u64 consume_size,
1934                          struct vmci_qp_page_store *page_store,
1935                          struct vmci_ctx *context)
1936 {
1937         if (!QP_SIZES_ARE_VALID(produce_size, consume_size))
1938                 return VMCI_ERROR_NO_RESOURCES;
1939
1940         return qp_broker_alloc(handle, peer, flags, priv_flags,
1941                                produce_size, consume_size,
1942                                page_store, context, NULL, NULL, NULL, NULL);
1943 }
1944
1945 /*
1946  * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
1947  * step to add the UVAs of the VMX mapping of the queue pair. This function
1948  * provides backwards compatibility with such VMX'en, and takes care of
1949  * registering the page store for a queue pair previously allocated by the
1950  * VMX during create or attach. This function will move the queue pair state
1951  * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
1952  * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
1953  * attached state with memory, the queue pair is ready to be used by the
1954  * host peer, and an attached event will be generated.
1955  *
1956  * Assumes that the queue pair broker lock is held.
1957  *
1958  * This function is only used by the hosted platform, since there is no
1959  * issue with backwards compatibility for vmkernel.
1960  */
1961 int vmci_qp_broker_set_page_store(struct vmci_handle handle,
1962                                   u64 produce_uva,
1963                                   u64 consume_uva,
1964                                   struct vmci_ctx *context)
1965 {
1966         struct qp_broker_entry *entry;
1967         int result;
1968         const u32 context_id = vmci_ctx_get_id(context);
1969
1970         if (vmci_handle_is_invalid(handle) || !context ||
1971             context_id == VMCI_INVALID_ID)
1972                 return VMCI_ERROR_INVALID_ARGS;
1973
1974         /*
1975          * We only support guest to host queue pairs, so the VMX must
1976          * supply UVAs for the mapped page files.
1977          */
1978
1979         if (produce_uva == 0 || consume_uva == 0)
1980                 return VMCI_ERROR_INVALID_ARGS;
1981
1982         mutex_lock(&qp_broker_list.mutex);
1983
1984         if (!vmci_ctx_qp_exists(context, handle)) {
1985                 pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
1986                         context_id, handle.context, handle.resource);
1987                 result = VMCI_ERROR_NOT_FOUND;
1988                 goto out;
1989         }
1990
1991         entry = qp_broker_handle_to_entry(handle);
1992         if (!entry) {
1993                 result = VMCI_ERROR_NOT_FOUND;
1994                 goto out;
1995         }
1996
1997         /*
1998          * If I'm the owner then I can set the page store.
1999          *
2000          * Or, if a host created the queue_pair and I'm the attached peer
2001          * then I can set the page store.
2002          */
2003         if (entry->create_id != context_id &&
2004             (entry->create_id != VMCI_HOST_CONTEXT_ID ||
2005              entry->attach_id != context_id)) {
2006                 result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
2007                 goto out;
2008         }
2009
2010         if (entry->state != VMCIQPB_CREATED_NO_MEM &&
2011             entry->state != VMCIQPB_ATTACHED_NO_MEM) {
2012                 result = VMCI_ERROR_UNAVAILABLE;
2013                 goto out;
2014         }
2015
2016         result = qp_host_get_user_memory(produce_uva, consume_uva,
2017                                          entry->produce_q, entry->consume_q);
2018         if (result < VMCI_SUCCESS)
2019                 goto out;
2020
2021         result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2022         if (result < VMCI_SUCCESS) {
2023                 qp_host_unregister_user_memory(entry->produce_q,
2024                                                entry->consume_q);
2025                 goto out;
2026         }
2027
2028         if (entry->state == VMCIQPB_CREATED_NO_MEM)
2029                 entry->state = VMCIQPB_CREATED_MEM;
2030         else
2031                 entry->state = VMCIQPB_ATTACHED_MEM;
2032
2033         entry->vmci_page_files = true;
2034
2035         if (entry->state == VMCIQPB_ATTACHED_MEM) {
2036                 result =
2037                     qp_notify_peer(true, handle, context_id, entry->create_id);
2038                 if (result < VMCI_SUCCESS) {
2039                         pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
2040                                 entry->create_id, entry->qp.handle.context,
2041                                 entry->qp.handle.resource);
2042                 }
2043         }
2044
2045         result = VMCI_SUCCESS;
2046  out:
2047         mutex_unlock(&qp_broker_list.mutex);
2048         return result;
2049 }
2050
2051 /*
2052  * Resets saved queue headers for the given QP broker
2053  * entry. Should be used when guest memory becomes available
2054  * again, or the guest detaches.
2055  */
2056 static void qp_reset_saved_headers(struct qp_broker_entry *entry)
2057 {
2058         entry->produce_q->saved_header = NULL;
2059         entry->consume_q->saved_header = NULL;
2060 }
2061
2062 /*
2063  * The main entry point for detaching from a queue pair registered with the
2064  * queue pair broker. If more than one endpoint is attached to the queue
2065  * pair, the first endpoint will mainly decrement a reference count and
2066  * generate a notification to its peer. The last endpoint will clean up
2067  * the queue pair state registered with the broker.
2068  *
2069  * When a guest endpoint detaches, it will unmap and unregister the guest
2070  * memory backing the queue pair. If the host is still attached, it will
2071  * no longer be able to access the queue pair content.
2072  *
2073  * If the queue pair is already in a state where there is no memory
2074  * registered for the queue pair (any *_NO_MEM state), it will transition to
2075  * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
2076  * endpoint is the first of two endpoints to detach. If the host endpoint is
2077  * the first out of two to detach, the queue pair will move to the
2078  * VMCIQPB_SHUTDOWN_MEM state.
2079  */
2080 int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
2081 {
2082         struct qp_broker_entry *entry;
2083         const u32 context_id = vmci_ctx_get_id(context);
2084         u32 peer_id;
2085         bool is_local = false;
2086         int result;
2087
2088         if (vmci_handle_is_invalid(handle) || !context ||
2089             context_id == VMCI_INVALID_ID) {
2090                 return VMCI_ERROR_INVALID_ARGS;
2091         }
2092
2093         mutex_lock(&qp_broker_list.mutex);
2094
2095         if (!vmci_ctx_qp_exists(context, handle)) {
2096                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2097                          context_id, handle.context, handle.resource);
2098                 result = VMCI_ERROR_NOT_FOUND;
2099                 goto out;
2100         }
2101
2102         entry = qp_broker_handle_to_entry(handle);
2103         if (!entry) {
2104                 pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
2105                          context_id, handle.context, handle.resource);
2106                 result = VMCI_ERROR_NOT_FOUND;
2107                 goto out;
2108         }
2109
2110         if (context_id != entry->create_id && context_id != entry->attach_id) {
2111                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2112                 goto out;
2113         }
2114
2115         if (context_id == entry->create_id) {
2116                 peer_id = entry->attach_id;
2117                 entry->create_id = VMCI_INVALID_ID;
2118         } else {
2119                 peer_id = entry->create_id;
2120                 entry->attach_id = VMCI_INVALID_ID;
2121         }
2122         entry->qp.ref_count--;
2123
2124         is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
2125
2126         if (context_id != VMCI_HOST_CONTEXT_ID) {
2127                 bool headers_mapped;
2128
2129                 /*
2130                  * Pre NOVMVM vmx'en may detach from a queue pair
2131                  * before setting the page store, and in that case
2132                  * there is no user memory to detach from. Also, more
2133                  * recent VMX'en may detach from a queue pair in the
2134                  * quiesced state.
2135                  */
2136
2137                 qp_acquire_queue_mutex(entry->produce_q);
2138                 headers_mapped = entry->produce_q->q_header ||
2139                     entry->consume_q->q_header;
2140                 if (QPBROKERSTATE_HAS_MEM(entry)) {
2141                         result =
2142                             qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
2143                                                  entry->produce_q,
2144                                                  entry->consume_q);
2145                         if (result < VMCI_SUCCESS)
2146                                 pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2147                                         handle.context, handle.resource,
2148                                         result);
2149
2150                         qp_host_unregister_user_memory(entry->produce_q,
2151                                                        entry->consume_q);
2152
2153                 }
2154
2155                 if (!headers_mapped)
2156                         qp_reset_saved_headers(entry);
2157
2158                 qp_release_queue_mutex(entry->produce_q);
2159
2160                 if (!headers_mapped && entry->wakeup_cb)
2161                         entry->wakeup_cb(entry->client_data);
2162
2163         } else {
2164                 if (entry->wakeup_cb) {
2165                         entry->wakeup_cb = NULL;
2166                         entry->client_data = NULL;
2167                 }
2168         }
2169
2170         if (entry->qp.ref_count == 0) {
2171                 qp_list_remove_entry(&qp_broker_list, &entry->qp);
2172
2173                 if (is_local)
2174                         kfree(entry->local_mem);
2175
2176                 qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
2177                 qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
2178                 qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
2179                 /* Unlink from resource hash table and free callback */
2180                 vmci_resource_remove(&entry->resource);
2181
2182                 kfree(entry);
2183
2184                 vmci_ctx_qp_destroy(context, handle);
2185         } else {
2186                 qp_notify_peer(false, handle, context_id, peer_id);
2187                 if (context_id == VMCI_HOST_CONTEXT_ID &&
2188                     QPBROKERSTATE_HAS_MEM(entry)) {
2189                         entry->state = VMCIQPB_SHUTDOWN_MEM;
2190                 } else {
2191                         entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
2192                 }
2193
2194                 if (!is_local)
2195                         vmci_ctx_qp_destroy(context, handle);
2196
2197         }
2198         result = VMCI_SUCCESS;
2199  out:
2200         mutex_unlock(&qp_broker_list.mutex);
2201         return result;
2202 }
2203
2204 /*
2205  * Establishes the necessary mappings for a queue pair given a
2206  * reference to the queue pair guest memory. This is usually
2207  * called when a guest is unquiesced and the VMX is allowed to
2208  * map guest memory once again.
2209  */
2210 int vmci_qp_broker_map(struct vmci_handle handle,
2211                        struct vmci_ctx *context,
2212                        u64 guest_mem)
2213 {
2214         struct qp_broker_entry *entry;
2215         const u32 context_id = vmci_ctx_get_id(context);
2216         int result;
2217
2218         if (vmci_handle_is_invalid(handle) || !context ||
2219             context_id == VMCI_INVALID_ID)
2220                 return VMCI_ERROR_INVALID_ARGS;
2221
2222         mutex_lock(&qp_broker_list.mutex);
2223
2224         if (!vmci_ctx_qp_exists(context, handle)) {
2225                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2226                          context_id, handle.context, handle.resource);
2227                 result = VMCI_ERROR_NOT_FOUND;
2228                 goto out;
2229         }
2230
2231         entry = qp_broker_handle_to_entry(handle);
2232         if (!entry) {
2233                 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2234                          context_id, handle.context, handle.resource);
2235                 result = VMCI_ERROR_NOT_FOUND;
2236                 goto out;
2237         }
2238
2239         if (context_id != entry->create_id && context_id != entry->attach_id) {
2240                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2241                 goto out;
2242         }
2243
2244         result = VMCI_SUCCESS;
2245
2246         if (context_id != VMCI_HOST_CONTEXT_ID &&
2247             !QPBROKERSTATE_HAS_MEM(entry)) {
2248                 struct vmci_qp_page_store page_store;
2249
2250                 page_store.pages = guest_mem;
2251                 page_store.len = QPE_NUM_PAGES(entry->qp);
2252
2253                 qp_acquire_queue_mutex(entry->produce_q);
2254                 qp_reset_saved_headers(entry);
2255                 result =
2256                     qp_host_register_user_memory(&page_store,
2257                                                  entry->produce_q,
2258                                                  entry->consume_q);
2259                 qp_release_queue_mutex(entry->produce_q);
2260                 if (result == VMCI_SUCCESS) {
2261                         /* Move state from *_NO_MEM to *_MEM */
2262
2263                         entry->state++;
2264
2265                         if (entry->wakeup_cb)
2266                                 entry->wakeup_cb(entry->client_data);
2267                 }
2268         }
2269
2270  out:
2271         mutex_unlock(&qp_broker_list.mutex);
2272         return result;
2273 }
2274
2275 /*
2276  * Saves a snapshot of the queue headers for the given QP broker
2277  * entry. Should be used when guest memory is unmapped.
2278  * Results:
2279  * VMCI_SUCCESS on success, appropriate error code if guest memory
2280  * can't be accessed..
2281  */
2282 static int qp_save_headers(struct qp_broker_entry *entry)
2283 {
2284         int result;
2285
2286         if (entry->produce_q->saved_header != NULL &&
2287             entry->consume_q->saved_header != NULL) {
2288                 /*
2289                  *  If the headers have already been saved, we don't need to do
2290                  *  it again, and we don't want to map in the headers
2291                  *  unnecessarily.
2292                  */
2293
2294                 return VMCI_SUCCESS;
2295         }
2296
2297         if (NULL == entry->produce_q->q_header ||
2298             NULL == entry->consume_q->q_header) {
2299                 result = qp_host_map_queues(entry->produce_q, entry->consume_q);
2300                 if (result < VMCI_SUCCESS)
2301                         return result;
2302         }
2303
2304         memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
2305                sizeof(entry->saved_produce_q));
2306         entry->produce_q->saved_header = &entry->saved_produce_q;
2307         memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
2308                sizeof(entry->saved_consume_q));
2309         entry->consume_q->saved_header = &entry->saved_consume_q;
2310
2311         return VMCI_SUCCESS;
2312 }
2313
2314 /*
2315  * Removes all references to the guest memory of a given queue pair, and
2316  * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
2317  * called when a VM is being quiesced where access to guest memory should
2318  * avoided.
2319  */
2320 int vmci_qp_broker_unmap(struct vmci_handle handle,
2321                          struct vmci_ctx *context,
2322                          u32 gid)
2323 {
2324         struct qp_broker_entry *entry;
2325         const u32 context_id = vmci_ctx_get_id(context);
2326         int result;
2327
2328         if (vmci_handle_is_invalid(handle) || !context ||
2329             context_id == VMCI_INVALID_ID)
2330                 return VMCI_ERROR_INVALID_ARGS;
2331
2332         mutex_lock(&qp_broker_list.mutex);
2333
2334         if (!vmci_ctx_qp_exists(context, handle)) {
2335                 pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
2336                          context_id, handle.context, handle.resource);
2337                 result = VMCI_ERROR_NOT_FOUND;
2338                 goto out;
2339         }
2340
2341         entry = qp_broker_handle_to_entry(handle);
2342         if (!entry) {
2343                 pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
2344                          context_id, handle.context, handle.resource);
2345                 result = VMCI_ERROR_NOT_FOUND;
2346                 goto out;
2347         }
2348
2349         if (context_id != entry->create_id && context_id != entry->attach_id) {
2350                 result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2351                 goto out;
2352         }
2353
2354         if (context_id != VMCI_HOST_CONTEXT_ID &&
2355             QPBROKERSTATE_HAS_MEM(entry)) {
2356                 qp_acquire_queue_mutex(entry->produce_q);
2357                 result = qp_save_headers(entry);
2358                 if (result < VMCI_SUCCESS)
2359                         pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
2360                                 handle.context, handle.resource, result);
2361
2362                 qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
2363
2364                 /*
2365                  * On hosted, when we unmap queue pairs, the VMX will also
2366                  * unmap the guest memory, so we invalidate the previously
2367                  * registered memory. If the queue pair is mapped again at a
2368                  * later point in time, we will need to reregister the user
2369                  * memory with a possibly new user VA.
2370                  */
2371                 qp_host_unregister_user_memory(entry->produce_q,
2372                                                entry->consume_q);
2373
2374                 /*
2375                  * Move state from *_MEM to *_NO_MEM.
2376                  */
2377                 entry->state--;
2378
2379                 qp_release_queue_mutex(entry->produce_q);
2380         }
2381
2382         result = VMCI_SUCCESS;
2383
2384  out:
2385         mutex_unlock(&qp_broker_list.mutex);
2386         return result;
2387 }
2388
2389 /*
2390  * Destroys all guest queue pair endpoints. If active guest queue
2391  * pairs still exist, hypercalls to attempt detach from these
2392  * queue pairs will be made. Any failure to detach is silently
2393  * ignored.
2394  */
2395 void vmci_qp_guest_endpoints_exit(void)
2396 {
2397         struct qp_entry *entry;
2398         struct qp_guest_endpoint *ep;
2399
2400         mutex_lock(&qp_guest_endpoints.mutex);
2401
2402         while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
2403                 ep = (struct qp_guest_endpoint *)entry;
2404
2405                 /* Don't make a hypercall for local queue_pairs. */
2406                 if (!(entry->flags & VMCI_QPFLAG_LOCAL))
2407                         qp_detatch_hypercall(entry->handle);
2408
2409                 /* We cannot fail the exit, so let's reset ref_count. */
2410                 entry->ref_count = 0;
2411                 qp_list_remove_entry(&qp_guest_endpoints, entry);
2412
2413                 qp_guest_endpoint_destroy(ep);
2414         }
2415
2416         mutex_unlock(&qp_guest_endpoints.mutex);
2417 }
2418
2419 /*
2420  * Helper routine that will lock the queue pair before subsequent
2421  * operations.
2422  * Note: Non-blocking on the host side is currently only implemented in ESX.
2423  * Since non-blocking isn't yet implemented on the host personality we
2424  * have no reason to acquire a spin lock.  So to avoid the use of an
2425  * unnecessary lock only acquire the mutex if we can block.
2426  */
2427 static void qp_lock(const struct vmci_qp *qpair)
2428 {
2429         qp_acquire_queue_mutex(qpair->produce_q);
2430 }
2431
2432 /*
2433  * Helper routine that unlocks the queue pair after calling
2434  * qp_lock.
2435  */
2436 static void qp_unlock(const struct vmci_qp *qpair)
2437 {
2438         qp_release_queue_mutex(qpair->produce_q);
2439 }
2440
2441 /*
2442  * The queue headers may not be mapped at all times. If a queue is
2443  * currently not mapped, it will be attempted to do so.
2444  */
2445 static int qp_map_queue_headers(struct vmci_queue *produce_q,
2446                                 struct vmci_queue *consume_q)
2447 {
2448         int result;
2449
2450         if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
2451                 result = qp_host_map_queues(produce_q, consume_q);
2452                 if (result < VMCI_SUCCESS)
2453                         return (produce_q->saved_header &&
2454                                 consume_q->saved_header) ?
2455                             VMCI_ERROR_QUEUEPAIR_NOT_READY :
2456                             VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
2457         }
2458
2459         return VMCI_SUCCESS;
2460 }
2461
2462 /*
2463  * Helper routine that will retrieve the produce and consume
2464  * headers of a given queue pair. If the guest memory of the
2465  * queue pair is currently not available, the saved queue headers
2466  * will be returned, if these are available.
2467  */
2468 static int qp_get_queue_headers(const struct vmci_qp *qpair,
2469                                 struct vmci_queue_header **produce_q_header,
2470                                 struct vmci_queue_header **consume_q_header)
2471 {
2472         int result;
2473
2474         result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q);
2475         if (result == VMCI_SUCCESS) {
2476                 *produce_q_header = qpair->produce_q->q_header;
2477                 *consume_q_header = qpair->consume_q->q_header;
2478         } else if (qpair->produce_q->saved_header &&
2479                    qpair->consume_q->saved_header) {
2480                 *produce_q_header = qpair->produce_q->saved_header;
2481                 *consume_q_header = qpair->consume_q->saved_header;
2482                 result = VMCI_SUCCESS;
2483         }
2484
2485         return result;
2486 }
2487
2488 /*
2489  * Callback from VMCI queue pair broker indicating that a queue
2490  * pair that was previously not ready, now either is ready or
2491  * gone forever.
2492  */
2493 static int qp_wakeup_cb(void *client_data)
2494 {
2495         struct vmci_qp *qpair = (struct vmci_qp *)client_data;
2496
2497         qp_lock(qpair);
2498         while (qpair->blocked > 0) {
2499                 qpair->blocked--;
2500                 qpair->generation++;
2501                 wake_up(&qpair->event);
2502         }
2503         qp_unlock(qpair);
2504
2505         return VMCI_SUCCESS;
2506 }
2507
2508 /*
2509  * Makes the calling thread wait for the queue pair to become
2510  * ready for host side access.  Returns true when thread is
2511  * woken up after queue pair state change, false otherwise.
2512  */
2513 static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
2514 {
2515         unsigned int generation;
2516
2517         qpair->blocked++;
2518         generation = qpair->generation;
2519         qp_unlock(qpair);
2520         wait_event(qpair->event, generation != qpair->generation);
2521         qp_lock(qpair);
2522
2523         return true;
2524 }
2525
2526 /*
2527  * Enqueues a given buffer to the produce queue using the provided
2528  * function. As many bytes as possible (space available in the queue)
2529  * are enqueued.  Assumes the queue->mutex has been acquired.  Returns
2530  * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
2531  * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
2532  * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
2533  * an error occured when accessing the buffer,
2534  * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
2535  * available.  Otherwise, the number of bytes written to the queue is
2536  * returned.  Updates the tail pointer of the produce queue.
2537  */
2538 static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
2539                                  struct vmci_queue *consume_q,
2540                                  const u64 produce_q_size,
2541                                  struct iov_iter *from)
2542 {
2543         s64 free_space;
2544         u64 tail;
2545         size_t buf_size = iov_iter_count(from);
2546         size_t written;
2547         ssize_t result;
2548
2549         result = qp_map_queue_headers(produce_q, consume_q);
2550         if (unlikely(result != VMCI_SUCCESS))
2551                 return result;
2552
2553         free_space = vmci_q_header_free_space(produce_q->q_header,
2554                                               consume_q->q_header,
2555                                               produce_q_size);
2556         if (free_space == 0)
2557                 return VMCI_ERROR_QUEUEPAIR_NOSPACE;
2558
2559         if (free_space < VMCI_SUCCESS)
2560                 return (ssize_t) free_space;
2561
2562         written = (size_t) (free_space > buf_size ? buf_size : free_space);
2563         tail = vmci_q_header_producer_tail(produce_q->q_header);
2564         if (likely(tail + written < produce_q_size)) {
2565                 result = qp_memcpy_to_queue_iter(produce_q, tail, from, written);
2566         } else {
2567                 /* Tail pointer wraps around. */
2568
2569                 const size_t tmp = (size_t) (produce_q_size - tail);
2570
2571                 result = qp_memcpy_to_queue_iter(produce_q, tail, from, tmp);
2572                 if (result >= VMCI_SUCCESS)
2573                         result = qp_memcpy_to_queue_iter(produce_q, 0, from,
2574                                                  written - tmp);
2575         }
2576
2577         if (result < VMCI_SUCCESS)
2578                 return result;
2579
2580         /*
2581          * This virt_wmb() ensures that data written to the queue
2582          * is observable before the new producer_tail is.
2583          */
2584         virt_wmb();
2585
2586         vmci_q_header_add_producer_tail(produce_q->q_header, written,
2587                                         produce_q_size);
2588         return written;
2589 }
2590
2591 /*
2592  * Dequeues data (if available) from the given consume queue. Writes data
2593  * to the user provided buffer using the provided function.
2594  * Assumes the queue->mutex has been acquired.
2595  * Results:
2596  * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
2597  * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
2598  * (as defined by the queue size).
2599  * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
2600  * Otherwise the number of bytes dequeued is returned.
2601  * Side effects:
2602  * Updates the head pointer of the consume queue.
2603  */
2604 static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
2605                                  struct vmci_queue *consume_q,
2606                                  const u64 consume_q_size,
2607                                  struct iov_iter *to,
2608                                  bool update_consumer)
2609 {
2610         size_t buf_size = iov_iter_count(to);
2611         s64 buf_ready;
2612         u64 head;
2613         size_t read;
2614         ssize_t result;
2615
2616         result = qp_map_queue_headers(produce_q, consume_q);
2617         if (unlikely(result != VMCI_SUCCESS))
2618                 return result;
2619
2620         buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
2621                                             produce_q->q_header,
2622                                             consume_q_size);
2623         if (buf_ready == 0)
2624                 return VMCI_ERROR_QUEUEPAIR_NODATA;
2625
2626         if (buf_ready < VMCI_SUCCESS)
2627                 return (ssize_t) buf_ready;
2628
2629         /*
2630          * This virt_rmb() ensures that data from the queue will be read
2631          * after we have determined how much is ready to be consumed.
2632          */
2633         virt_rmb();
2634
2635         read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
2636         head = vmci_q_header_consumer_head(produce_q->q_header);
2637         if (likely(head + read < consume_q_size)) {
2638                 result = qp_memcpy_from_queue_iter(to, consume_q, head, read);
2639         } else {
2640                 /* Head pointer wraps around. */
2641
2642                 const size_t tmp = (size_t) (consume_q_size - head);
2643
2644                 result = qp_memcpy_from_queue_iter(to, consume_q, head, tmp);
2645                 if (result >= VMCI_SUCCESS)
2646                         result = qp_memcpy_from_queue_iter(to, consume_q, 0,
2647                                                    read - tmp);
2648
2649         }
2650
2651         if (result < VMCI_SUCCESS)
2652                 return result;
2653
2654         if (update_consumer)
2655                 vmci_q_header_add_consumer_head(produce_q->q_header,
2656                                                 read, consume_q_size);
2657
2658         return read;
2659 }
2660
2661 /*
2662  * vmci_qpair_alloc() - Allocates a queue pair.
2663  * @qpair:      Pointer for the new vmci_qp struct.
2664  * @handle:     Handle to track the resource.
2665  * @produce_qsize:      Desired size of the producer queue.
2666  * @consume_qsize:      Desired size of the consumer queue.
2667  * @peer:       ContextID of the peer.
2668  * @flags:      VMCI flags.
2669  * @priv_flags: VMCI priviledge flags.
2670  *
2671  * This is the client interface for allocating the memory for a
2672  * vmci_qp structure and then attaching to the underlying
2673  * queue.  If an error occurs allocating the memory for the
2674  * vmci_qp structure no attempt is made to attach.  If an
2675  * error occurs attaching, then the structure is freed.
2676  */
2677 int vmci_qpair_alloc(struct vmci_qp **qpair,
2678                      struct vmci_handle *handle,
2679                      u64 produce_qsize,
2680                      u64 consume_qsize,
2681                      u32 peer,
2682                      u32 flags,
2683                      u32 priv_flags)
2684 {
2685         struct vmci_qp *my_qpair;
2686         int retval;
2687         struct vmci_handle src = VMCI_INVALID_HANDLE;
2688         struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
2689         enum vmci_route route;
2690         vmci_event_release_cb wakeup_cb;
2691         void *client_data;
2692
2693         /*
2694          * Restrict the size of a queuepair.  The device already
2695          * enforces a limit on the total amount of memory that can be
2696          * allocated to queuepairs for a guest.  However, we try to
2697          * allocate this memory before we make the queuepair
2698          * allocation hypercall.  On Linux, we allocate each page
2699          * separately, which means rather than fail, the guest will
2700          * thrash while it tries to allocate, and will become
2701          * increasingly unresponsive to the point where it appears to
2702          * be hung.  So we place a limit on the size of an individual
2703          * queuepair here, and leave the device to enforce the
2704          * restriction on total queuepair memory.  (Note that this
2705          * doesn't prevent all cases; a user with only this much
2706          * physical memory could still get into trouble.)  The error
2707          * used by the device is NO_RESOURCES, so use that here too.
2708          */
2709
2710         if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize))
2711                 return VMCI_ERROR_NO_RESOURCES;
2712
2713         retval = vmci_route(&src, &dst, false, &route);
2714         if (retval < VMCI_SUCCESS)
2715                 route = vmci_guest_code_active() ?
2716                     VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
2717
2718         if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) {
2719                 pr_devel("NONBLOCK OR PINNED set");
2720                 return VMCI_ERROR_INVALID_ARGS;
2721         }
2722
2723         my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
2724         if (!my_qpair)
2725                 return VMCI_ERROR_NO_MEM;
2726
2727         my_qpair->produce_q_size = produce_qsize;
2728         my_qpair->consume_q_size = consume_qsize;
2729         my_qpair->peer = peer;
2730         my_qpair->flags = flags;
2731         my_qpair->priv_flags = priv_flags;
2732
2733         wakeup_cb = NULL;
2734         client_data = NULL;
2735
2736         if (VMCI_ROUTE_AS_HOST == route) {
2737                 my_qpair->guest_endpoint = false;
2738                 if (!(flags & VMCI_QPFLAG_LOCAL)) {
2739                         my_qpair->blocked = 0;
2740                         my_qpair->generation = 0;
2741                         init_waitqueue_head(&my_qpair->event);
2742                         wakeup_cb = qp_wakeup_cb;
2743                         client_data = (void *)my_qpair;
2744                 }
2745         } else {
2746                 my_qpair->guest_endpoint = true;
2747         }
2748
2749         retval = vmci_qp_alloc(handle,
2750                                &my_qpair->produce_q,
2751                                my_qpair->produce_q_size,
2752                                &my_qpair->consume_q,
2753                                my_qpair->consume_q_size,
2754                                my_qpair->peer,
2755                                my_qpair->flags,
2756                                my_qpair->priv_flags,
2757                                my_qpair->guest_endpoint,
2758                                wakeup_cb, client_data);
2759
2760         if (retval < VMCI_SUCCESS) {
2761                 kfree(my_qpair);
2762                 return retval;
2763         }
2764
2765         *qpair = my_qpair;
2766         my_qpair->handle = *handle;
2767
2768         return retval;
2769 }
2770 EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
2771
2772 /*
2773  * vmci_qpair_detach() - Detatches the client from a queue pair.
2774  * @qpair:      Reference of a pointer to the qpair struct.
2775  *
2776  * This is the client interface for detaching from a VMCIQPair.
2777  * Note that this routine will free the memory allocated for the
2778  * vmci_qp structure too.
2779  */
2780 int vmci_qpair_detach(struct vmci_qp **qpair)
2781 {
2782         int result;
2783         struct vmci_qp *old_qpair;
2784
2785         if (!qpair || !(*qpair))
2786                 return VMCI_ERROR_INVALID_ARGS;
2787
2788         old_qpair = *qpair;
2789         result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
2790
2791         /*
2792          * The guest can fail to detach for a number of reasons, and
2793          * if it does so, it will cleanup the entry (if there is one).
2794          * The host can fail too, but it won't cleanup the entry
2795          * immediately, it will do that later when the context is
2796          * freed.  Either way, we need to release the qpair struct
2797          * here; there isn't much the caller can do, and we don't want
2798          * to leak.
2799          */
2800
2801         memset(old_qpair, 0, sizeof(*old_qpair));
2802         old_qpair->handle = VMCI_INVALID_HANDLE;
2803         old_qpair->peer = VMCI_INVALID_ID;
2804         kfree(old_qpair);
2805         *qpair = NULL;
2806
2807         return result;
2808 }
2809 EXPORT_SYMBOL_GPL(vmci_qpair_detach);
2810
2811 /*
2812  * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
2813  * @qpair:      Pointer to the queue pair struct.
2814  * @producer_tail:      Reference used for storing producer tail index.
2815  * @consumer_head:      Reference used for storing the consumer head index.
2816  *
2817  * This is the client interface for getting the current indexes of the
2818  * QPair from the point of the view of the caller as the producer.
2819  */
2820 int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
2821                                    u64 *producer_tail,
2822                                    u64 *consumer_head)
2823 {
2824         struct vmci_queue_header *produce_q_header;
2825         struct vmci_queue_header *consume_q_header;
2826         int result;
2827
2828         if (!qpair)
2829                 return VMCI_ERROR_INVALID_ARGS;
2830
2831         qp_lock(qpair);
2832         result =
2833             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2834         if (result == VMCI_SUCCESS)
2835                 vmci_q_header_get_pointers(produce_q_header, consume_q_header,
2836                                            producer_tail, consumer_head);
2837         qp_unlock(qpair);
2838
2839         if (result == VMCI_SUCCESS &&
2840             ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
2841              (consumer_head && *consumer_head >= qpair->produce_q_size)))
2842                 return VMCI_ERROR_INVALID_SIZE;
2843
2844         return result;
2845 }
2846 EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
2847
2848 /*
2849  * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the consumer.
2850  * @qpair:      Pointer to the queue pair struct.
2851  * @consumer_tail:      Reference used for storing consumer tail index.
2852  * @producer_head:      Reference used for storing the producer head index.
2853  *
2854  * This is the client interface for getting the current indexes of the
2855  * QPair from the point of the view of the caller as the consumer.
2856  */
2857 int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
2858                                    u64 *consumer_tail,
2859                                    u64 *producer_head)
2860 {
2861         struct vmci_queue_header *produce_q_header;
2862         struct vmci_queue_header *consume_q_header;
2863         int result;
2864
2865         if (!qpair)
2866                 return VMCI_ERROR_INVALID_ARGS;
2867
2868         qp_lock(qpair);
2869         result =
2870             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2871         if (result == VMCI_SUCCESS)
2872                 vmci_q_header_get_pointers(consume_q_header, produce_q_header,
2873                                            consumer_tail, producer_head);
2874         qp_unlock(qpair);
2875
2876         if (result == VMCI_SUCCESS &&
2877             ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
2878              (producer_head && *producer_head >= qpair->consume_q_size)))
2879                 return VMCI_ERROR_INVALID_SIZE;
2880
2881         return result;
2882 }
2883 EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
2884
2885 /*
2886  * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
2887  * @qpair:      Pointer to the queue pair struct.
2888  *
2889  * This is the client interface for getting the amount of free
2890  * space in the QPair from the point of the view of the caller as
2891  * the producer which is the common case.  Returns < 0 if err, else
2892  * available bytes into which data can be enqueued if > 0.
2893  */
2894 s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
2895 {
2896         struct vmci_queue_header *produce_q_header;
2897         struct vmci_queue_header *consume_q_header;
2898         s64 result;
2899
2900         if (!qpair)
2901                 return VMCI_ERROR_INVALID_ARGS;
2902
2903         qp_lock(qpair);
2904         result =
2905             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2906         if (result == VMCI_SUCCESS)
2907                 result = vmci_q_header_free_space(produce_q_header,
2908                                                   consume_q_header,
2909                                                   qpair->produce_q_size);
2910         else
2911                 result = 0;
2912
2913         qp_unlock(qpair);
2914
2915         return result;
2916 }
2917 EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
2918
2919 /*
2920  * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
2921  * @qpair:      Pointer to the queue pair struct.
2922  *
2923  * This is the client interface for getting the amount of free
2924  * space in the QPair from the point of the view of the caller as
2925  * the consumer which is not the common case.  Returns < 0 if err, else
2926  * available bytes into which data can be enqueued if > 0.
2927  */
2928 s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
2929 {
2930         struct vmci_queue_header *produce_q_header;
2931         struct vmci_queue_header *consume_q_header;
2932         s64 result;
2933
2934         if (!qpair)
2935                 return VMCI_ERROR_INVALID_ARGS;
2936
2937         qp_lock(qpair);
2938         result =
2939             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2940         if (result == VMCI_SUCCESS)
2941                 result = vmci_q_header_free_space(consume_q_header,
2942                                                   produce_q_header,
2943                                                   qpair->consume_q_size);
2944         else
2945                 result = 0;
2946
2947         qp_unlock(qpair);
2948
2949         return result;
2950 }
2951 EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
2952
2953 /*
2954  * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
2955  * producer queue.
2956  * @qpair:      Pointer to the queue pair struct.
2957  *
2958  * This is the client interface for getting the amount of
2959  * enqueued data in the QPair from the point of the view of the
2960  * caller as the producer which is not the common case.  Returns < 0 if err,
2961  * else available bytes that may be read.
2962  */
2963 s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
2964 {
2965         struct vmci_queue_header *produce_q_header;
2966         struct vmci_queue_header *consume_q_header;
2967         s64 result;
2968
2969         if (!qpair)
2970                 return VMCI_ERROR_INVALID_ARGS;
2971
2972         qp_lock(qpair);
2973         result =
2974             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
2975         if (result == VMCI_SUCCESS)
2976                 result = vmci_q_header_buf_ready(produce_q_header,
2977                                                  consume_q_header,
2978                                                  qpair->produce_q_size);
2979         else
2980                 result = 0;
2981
2982         qp_unlock(qpair);
2983
2984         return result;
2985 }
2986 EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
2987
2988 /*
2989  * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
2990  * consumer queue.
2991  * @qpair:      Pointer to the queue pair struct.
2992  *
2993  * This is the client interface for getting the amount of
2994  * enqueued data in the QPair from the point of the view of the
2995  * caller as the consumer which is the normal case.  Returns < 0 if err,
2996  * else available bytes that may be read.
2997  */
2998 s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
2999 {
3000         struct vmci_queue_header *produce_q_header;
3001         struct vmci_queue_header *consume_q_header;
3002         s64 result;
3003
3004         if (!qpair)
3005                 return VMCI_ERROR_INVALID_ARGS;
3006
3007         qp_lock(qpair);
3008         result =
3009             qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
3010         if (result == VMCI_SUCCESS)
3011                 result = vmci_q_header_buf_ready(consume_q_header,
3012                                                  produce_q_header,
3013                                                  qpair->consume_q_size);
3014         else
3015                 result = 0;
3016
3017         qp_unlock(qpair);
3018
3019         return result;
3020 }
3021 EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
3022
3023 /*
3024  * vmci_qpair_enqueue() - Throw data on the queue.
3025  * @qpair:      Pointer to the queue pair struct.
3026  * @buf:        Pointer to buffer containing data
3027  * @buf_size:   Length of buffer.
3028  * @buf_type:   Buffer type (Unused).
3029  *
3030  * This is the client interface for enqueueing data into the queue.
3031  * Returns number of bytes enqueued or < 0 on error.
3032  */
3033 ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
3034                            const void *buf,
3035                            size_t buf_size,
3036                            int buf_type)
3037 {
3038         ssize_t result;
3039         struct iov_iter from;
3040         struct kvec v = {.iov_base = (void *)buf, .iov_len = buf_size};
3041
3042         if (!qpair || !buf)
3043                 return VMCI_ERROR_INVALID_ARGS;
3044
3045         iov_iter_kvec(&from, WRITE, &v, 1, buf_size);
3046
3047         qp_lock(qpair);
3048
3049         do {
3050                 result = qp_enqueue_locked(qpair->produce_q,
3051                                            qpair->consume_q,
3052                                            qpair->produce_q_size,
3053                                            &from);
3054
3055                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3056                     !qp_wait_for_ready_queue(qpair))
3057                         result = VMCI_ERROR_WOULD_BLOCK;
3058
3059         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3060
3061         qp_unlock(qpair);
3062
3063         return result;
3064 }
3065 EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
3066
3067 /*
3068  * vmci_qpair_dequeue() - Get data from the queue.
3069  * @qpair:      Pointer to the queue pair struct.
3070  * @buf:        Pointer to buffer for the data
3071  * @buf_size:   Length of buffer.
3072  * @buf_type:   Buffer type (Unused).
3073  *
3074  * This is the client interface for dequeueing data from the queue.
3075  * Returns number of bytes dequeued or < 0 on error.
3076  */
3077 ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
3078                            void *buf,
3079                            size_t buf_size,
3080                            int buf_type)
3081 {
3082         ssize_t result;
3083         struct iov_iter to;
3084         struct kvec v = {.iov_base = buf, .iov_len = buf_size};
3085
3086         if (!qpair || !buf)
3087                 return VMCI_ERROR_INVALID_ARGS;
3088
3089         iov_iter_kvec(&to, READ, &v, 1, buf_size);
3090
3091         qp_lock(qpair);
3092
3093         do {
3094                 result = qp_dequeue_locked(qpair->produce_q,
3095                                            qpair->consume_q,
3096                                            qpair->consume_q_size,
3097                                            &to, true);
3098
3099                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3100                     !qp_wait_for_ready_queue(qpair))
3101                         result = VMCI_ERROR_WOULD_BLOCK;
3102
3103         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3104
3105         qp_unlock(qpair);
3106
3107         return result;
3108 }
3109 EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
3110
3111 /*
3112  * vmci_qpair_peek() - Peek at the data in the queue.
3113  * @qpair:      Pointer to the queue pair struct.
3114  * @buf:        Pointer to buffer for the data
3115  * @buf_size:   Length of buffer.
3116  * @buf_type:   Buffer type (Unused on Linux).
3117  *
3118  * This is the client interface for peeking into a queue.  (I.e.,
3119  * copy data from the queue without updating the head pointer.)
3120  * Returns number of bytes dequeued or < 0 on error.
3121  */
3122 ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
3123                         void *buf,
3124                         size_t buf_size,
3125                         int buf_type)
3126 {
3127         struct iov_iter to;
3128         struct kvec v = {.iov_base = buf, .iov_len = buf_size};
3129         ssize_t result;
3130
3131         if (!qpair || !buf)
3132                 return VMCI_ERROR_INVALID_ARGS;
3133
3134         iov_iter_kvec(&to, READ, &v, 1, buf_size);
3135
3136         qp_lock(qpair);
3137
3138         do {
3139                 result = qp_dequeue_locked(qpair->produce_q,
3140                                            qpair->consume_q,
3141                                            qpair->consume_q_size,
3142                                            &to, false);
3143
3144                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3145                     !qp_wait_for_ready_queue(qpair))
3146                         result = VMCI_ERROR_WOULD_BLOCK;
3147
3148         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3149
3150         qp_unlock(qpair);
3151
3152         return result;
3153 }
3154 EXPORT_SYMBOL_GPL(vmci_qpair_peek);
3155
3156 /*
3157  * vmci_qpair_enquev() - Throw data on the queue using iov.
3158  * @qpair:      Pointer to the queue pair struct.
3159  * @iov:        Pointer to buffer containing data
3160  * @iov_size:   Length of buffer.
3161  * @buf_type:   Buffer type (Unused).
3162  *
3163  * This is the client interface for enqueueing data into the queue.
3164  * This function uses IO vectors to handle the work. Returns number
3165  * of bytes enqueued or < 0 on error.
3166  */
3167 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
3168                           struct msghdr *msg,
3169                           size_t iov_size,
3170                           int buf_type)
3171 {
3172         ssize_t result;
3173
3174         if (!qpair)
3175                 return VMCI_ERROR_INVALID_ARGS;
3176
3177         qp_lock(qpair);
3178
3179         do {
3180                 result = qp_enqueue_locked(qpair->produce_q,
3181                                            qpair->consume_q,
3182                                            qpair->produce_q_size,
3183                                            &msg->msg_iter);
3184
3185                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3186                     !qp_wait_for_ready_queue(qpair))
3187                         result = VMCI_ERROR_WOULD_BLOCK;
3188
3189         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3190
3191         qp_unlock(qpair);
3192
3193         return result;
3194 }
3195 EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
3196
3197 /*
3198  * vmci_qpair_dequev() - Get data from the queue using iov.
3199  * @qpair:      Pointer to the queue pair struct.
3200  * @iov:        Pointer to buffer for the data
3201  * @iov_size:   Length of buffer.
3202  * @buf_type:   Buffer type (Unused).
3203  *
3204  * This is the client interface for dequeueing data from the queue.
3205  * This function uses IO vectors to handle the work. Returns number
3206  * of bytes dequeued or < 0 on error.
3207  */
3208 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
3209                           struct msghdr *msg,
3210                           size_t iov_size,
3211                           int buf_type)
3212 {
3213         ssize_t result;
3214
3215         if (!qpair)
3216                 return VMCI_ERROR_INVALID_ARGS;
3217
3218         qp_lock(qpair);
3219
3220         do {
3221                 result = qp_dequeue_locked(qpair->produce_q,
3222                                            qpair->consume_q,
3223                                            qpair->consume_q_size,
3224                                            &msg->msg_iter, true);
3225
3226                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3227                     !qp_wait_for_ready_queue(qpair))
3228                         result = VMCI_ERROR_WOULD_BLOCK;
3229
3230         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3231
3232         qp_unlock(qpair);
3233
3234         return result;
3235 }
3236 EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
3237
3238 /*
3239  * vmci_qpair_peekv() - Peek at the data in the queue using iov.
3240  * @qpair:      Pointer to the queue pair struct.
3241  * @iov:        Pointer to buffer for the data
3242  * @iov_size:   Length of buffer.
3243  * @buf_type:   Buffer type (Unused on Linux).
3244  *
3245  * This is the client interface for peeking into a queue.  (I.e.,
3246  * copy data from the queue without updating the head pointer.)
3247  * This function uses IO vectors to handle the work. Returns number
3248  * of bytes peeked or < 0 on error.
3249  */
3250 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
3251                          struct msghdr *msg,
3252                          size_t iov_size,
3253                          int buf_type)
3254 {
3255         ssize_t result;
3256
3257         if (!qpair)
3258                 return VMCI_ERROR_INVALID_ARGS;
3259
3260         qp_lock(qpair);
3261
3262         do {
3263                 result = qp_dequeue_locked(qpair->produce_q,
3264                                            qpair->consume_q,
3265                                            qpair->consume_q_size,
3266                                            &msg->msg_iter, false);
3267
3268                 if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
3269                     !qp_wait_for_ready_queue(qpair))
3270                         result = VMCI_ERROR_WOULD_BLOCK;
3271
3272         } while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
3273
3274         qp_unlock(qpair);
3275         return result;
3276 }
3277 EXPORT_SYMBOL_GPL(vmci_qpair_peekv);