vfs: do bulk POLL* -> EPOLL* replacement
[linux-2.6-microblaze.git] / drivers / misc / mic / vop / vop_vringh.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2016 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * The full GNU General Public License is included in this distribution in
16  * the file called "COPYING".
17  *
18  * Intel Virtio Over PCIe (VOP) driver.
19  *
20  */
21 #include <linux/sched.h>
22 #include <linux/poll.h>
23 #include <linux/dma-mapping.h>
24
25 #include <linux/mic_common.h>
26 #include "../common/mic_dev.h"
27
28 #include <linux/mic_ioctl.h>
29 #include "vop_main.h"
30
31 /* Helper API to obtain the VOP PCIe device */
32 static inline struct device *vop_dev(struct vop_vdev *vdev)
33 {
34         return vdev->vpdev->dev.parent;
35 }
36
37 /* Helper API to check if a virtio device is initialized */
38 static inline int vop_vdev_inited(struct vop_vdev *vdev)
39 {
40         if (!vdev)
41                 return -EINVAL;
42         /* Device has not been created yet */
43         if (!vdev->dd || !vdev->dd->type) {
44                 dev_err(vop_dev(vdev), "%s %d err %d\n",
45                         __func__, __LINE__, -EINVAL);
46                 return -EINVAL;
47         }
48         /* Device has been removed/deleted */
49         if (vdev->dd->type == -1) {
50                 dev_dbg(vop_dev(vdev), "%s %d err %d\n",
51                         __func__, __LINE__, -ENODEV);
52                 return -ENODEV;
53         }
54         return 0;
55 }
56
57 static void _vop_notify(struct vringh *vrh)
58 {
59         struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
60         struct vop_vdev *vdev = vvrh->vdev;
61         struct vop_device *vpdev = vdev->vpdev;
62         s8 db = vdev->dc->h2c_vdev_db;
63
64         if (db != -1)
65                 vpdev->hw_ops->send_intr(vpdev, db);
66 }
67
68 static void vop_virtio_init_post(struct vop_vdev *vdev)
69 {
70         struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
71         struct vop_device *vpdev = vdev->vpdev;
72         int i, used_size;
73
74         for (i = 0; i < vdev->dd->num_vq; i++) {
75                 used_size = PAGE_ALIGN(sizeof(u16) * 3 +
76                                 sizeof(struct vring_used_elem) *
77                                 le16_to_cpu(vqconfig->num));
78                 if (!le64_to_cpu(vqconfig[i].used_address)) {
79                         dev_warn(vop_dev(vdev), "used_address zero??\n");
80                         continue;
81                 }
82                 vdev->vvr[i].vrh.vring.used =
83                         (void __force *)vpdev->hw_ops->ioremap(
84                         vpdev,
85                         le64_to_cpu(vqconfig[i].used_address),
86                         used_size);
87         }
88
89         vdev->dc->used_address_updated = 0;
90
91         dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
92                  __func__, vdev->virtio_id);
93 }
94
95 static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
96 {
97         int i;
98
99         dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
100                 __func__, vdev->dd->status, vdev->virtio_id);
101
102         for (i = 0; i < vdev->dd->num_vq; i++)
103                 /*
104                  * Avoid lockdep false positive. The + 1 is for the vop
105                  * mutex which is held in the reset devices code path.
106                  */
107                 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
108
109         /* 0 status means "reset" */
110         vdev->dd->status = 0;
111         vdev->dc->vdev_reset = 0;
112         vdev->dc->host_ack = 1;
113
114         for (i = 0; i < vdev->dd->num_vq; i++) {
115                 struct vringh *vrh = &vdev->vvr[i].vrh;
116
117                 vdev->vvr[i].vring.info->avail_idx = 0;
118                 vrh->completed = 0;
119                 vrh->last_avail_idx = 0;
120                 vrh->last_used_idx = 0;
121         }
122
123         for (i = 0; i < vdev->dd->num_vq; i++)
124                 mutex_unlock(&vdev->vvr[i].vr_mutex);
125 }
126
127 static void vop_virtio_reset_devices(struct vop_info *vi)
128 {
129         struct list_head *pos, *tmp;
130         struct vop_vdev *vdev;
131
132         list_for_each_safe(pos, tmp, &vi->vdev_list) {
133                 vdev = list_entry(pos, struct vop_vdev, list);
134                 vop_virtio_device_reset(vdev);
135                 vdev->poll_wake = 1;
136                 wake_up(&vdev->waitq);
137         }
138 }
139
140 static void vop_bh_handler(struct work_struct *work)
141 {
142         struct vop_vdev *vdev = container_of(work, struct vop_vdev,
143                         virtio_bh_work);
144
145         if (vdev->dc->used_address_updated)
146                 vop_virtio_init_post(vdev);
147
148         if (vdev->dc->vdev_reset)
149                 vop_virtio_device_reset(vdev);
150
151         vdev->poll_wake = 1;
152         wake_up(&vdev->waitq);
153 }
154
155 static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
156 {
157         struct vop_vdev *vdev = data;
158         struct vop_device *vpdev = vdev->vpdev;
159
160         vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
161         schedule_work(&vdev->virtio_bh_work);
162         return IRQ_HANDLED;
163 }
164
165 static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
166 {
167         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
168         int ret = 0, retry, i;
169         struct vop_device *vpdev = vdev->vpdev;
170         struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
171         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
172         s8 db = bootparam->h2c_config_db;
173
174         mutex_lock(&vi->vop_mutex);
175         for (i = 0; i < vdev->dd->num_vq; i++)
176                 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
177
178         if (db == -1 || vdev->dd->type == -1) {
179                 ret = -EIO;
180                 goto exit;
181         }
182
183         memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
184         vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
185         vpdev->hw_ops->send_intr(vpdev, db);
186
187         for (retry = 100; retry--;) {
188                 ret = wait_event_timeout(wake, vdev->dc->guest_ack,
189                                          msecs_to_jiffies(100));
190                 if (ret)
191                         break;
192         }
193
194         dev_dbg(vop_dev(vdev),
195                 "%s %d retry: %d\n", __func__, __LINE__, retry);
196         vdev->dc->config_change = 0;
197         vdev->dc->guest_ack = 0;
198 exit:
199         for (i = 0; i < vdev->dd->num_vq; i++)
200                 mutex_unlock(&vdev->vvr[i].vr_mutex);
201         mutex_unlock(&vi->vop_mutex);
202         return ret;
203 }
204
205 static int vop_copy_dp_entry(struct vop_vdev *vdev,
206                              struct mic_device_desc *argp, __u8 *type,
207                              struct mic_device_desc **devpage)
208 {
209         struct vop_device *vpdev = vdev->vpdev;
210         struct mic_device_desc *devp;
211         struct mic_vqconfig *vqconfig;
212         int ret = 0, i;
213         bool slot_found = false;
214
215         vqconfig = mic_vq_config(argp);
216         for (i = 0; i < argp->num_vq; i++) {
217                 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
218                         ret =  -EINVAL;
219                         dev_err(vop_dev(vdev), "%s %d err %d\n",
220                                 __func__, __LINE__, ret);
221                         goto exit;
222                 }
223         }
224
225         /* Find the first free device page entry */
226         for (i = sizeof(struct mic_bootparam);
227                 i < MIC_DP_SIZE - mic_total_desc_size(argp);
228                 i += mic_total_desc_size(devp)) {
229                 devp = vpdev->hw_ops->get_dp(vpdev) + i;
230                 if (devp->type == 0 || devp->type == -1) {
231                         slot_found = true;
232                         break;
233                 }
234         }
235         if (!slot_found) {
236                 ret =  -EINVAL;
237                 dev_err(vop_dev(vdev), "%s %d err %d\n",
238                         __func__, __LINE__, ret);
239                 goto exit;
240         }
241         /*
242          * Save off the type before doing the memcpy. Type will be set in the
243          * end after completing all initialization for the new device.
244          */
245         *type = argp->type;
246         argp->type = 0;
247         memcpy(devp, argp, mic_desc_size(argp));
248
249         *devpage = devp;
250 exit:
251         return ret;
252 }
253
254 static void vop_init_device_ctrl(struct vop_vdev *vdev,
255                                  struct mic_device_desc *devpage)
256 {
257         struct mic_device_ctrl *dc;
258
259         dc = (void *)devpage + mic_aligned_desc_size(devpage);
260
261         dc->config_change = 0;
262         dc->guest_ack = 0;
263         dc->vdev_reset = 0;
264         dc->host_ack = 0;
265         dc->used_address_updated = 0;
266         dc->c2h_vdev_db = -1;
267         dc->h2c_vdev_db = -1;
268         vdev->dc = dc;
269 }
270
271 static int vop_virtio_add_device(struct vop_vdev *vdev,
272                                  struct mic_device_desc *argp)
273 {
274         struct vop_info *vi = vdev->vi;
275         struct vop_device *vpdev = vi->vpdev;
276         struct mic_device_desc *dd = NULL;
277         struct mic_vqconfig *vqconfig;
278         int vr_size, i, j, ret;
279         u8 type = 0;
280         s8 db = -1;
281         char irqname[16];
282         struct mic_bootparam *bootparam;
283         u16 num;
284         dma_addr_t vr_addr;
285
286         bootparam = vpdev->hw_ops->get_dp(vpdev);
287         init_waitqueue_head(&vdev->waitq);
288         INIT_LIST_HEAD(&vdev->list);
289         vdev->vpdev = vpdev;
290
291         ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
292         if (ret) {
293                 dev_err(vop_dev(vdev), "%s %d err %d\n",
294                         __func__, __LINE__, ret);
295                 return ret;
296         }
297
298         vop_init_device_ctrl(vdev, dd);
299
300         vdev->dd = dd;
301         vdev->virtio_id = type;
302         vqconfig = mic_vq_config(dd);
303         INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
304
305         for (i = 0; i < dd->num_vq; i++) {
306                 struct vop_vringh *vvr = &vdev->vvr[i];
307                 struct mic_vring *vr = &vdev->vvr[i].vring;
308
309                 num = le16_to_cpu(vqconfig[i].num);
310                 mutex_init(&vvr->vr_mutex);
311                 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
312                         sizeof(struct _mic_vring_info));
313                 vr->va = (void *)
314                         __get_free_pages(GFP_KERNEL | __GFP_ZERO,
315                                          get_order(vr_size));
316                 if (!vr->va) {
317                         ret = -ENOMEM;
318                         dev_err(vop_dev(vdev), "%s %d err %d\n",
319                                 __func__, __LINE__, ret);
320                         goto err;
321                 }
322                 vr->len = vr_size;
323                 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
324                 vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
325                 vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
326                                          DMA_BIDIRECTIONAL);
327                 if (dma_mapping_error(&vpdev->dev, vr_addr)) {
328                         free_pages((unsigned long)vr->va, get_order(vr_size));
329                         ret = -ENOMEM;
330                         dev_err(vop_dev(vdev), "%s %d err %d\n",
331                                 __func__, __LINE__, ret);
332                         goto err;
333                 }
334                 vqconfig[i].address = cpu_to_le64(vr_addr);
335
336                 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
337                 ret = vringh_init_kern(&vvr->vrh,
338                                        *(u32 *)mic_vq_features(vdev->dd),
339                                        num, false, vr->vr.desc, vr->vr.avail,
340                                        vr->vr.used);
341                 if (ret) {
342                         dev_err(vop_dev(vdev), "%s %d err %d\n",
343                                 __func__, __LINE__, ret);
344                         goto err;
345                 }
346                 vringh_kiov_init(&vvr->riov, NULL, 0);
347                 vringh_kiov_init(&vvr->wiov, NULL, 0);
348                 vvr->head = USHRT_MAX;
349                 vvr->vdev = vdev;
350                 vvr->vrh.notify = _vop_notify;
351                 dev_dbg(&vpdev->dev,
352                         "%s %d index %d va %p info %p vr_size 0x%x\n",
353                         __func__, __LINE__, i, vr->va, vr->info, vr_size);
354                 vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
355                                         get_order(VOP_INT_DMA_BUF_SIZE));
356                 vvr->buf_da = dma_map_single(&vpdev->dev,
357                                           vvr->buf, VOP_INT_DMA_BUF_SIZE,
358                                           DMA_BIDIRECTIONAL);
359         }
360
361         snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
362                  vdev->virtio_id);
363         vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
364         vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
365                         _vop_virtio_intr_handler, irqname, vdev,
366                         vdev->virtio_db);
367         if (IS_ERR(vdev->virtio_cookie)) {
368                 ret = PTR_ERR(vdev->virtio_cookie);
369                 dev_dbg(&vpdev->dev, "request irq failed\n");
370                 goto err;
371         }
372
373         vdev->dc->c2h_vdev_db = vdev->virtio_db;
374
375         /*
376          * Order the type update with previous stores. This write barrier
377          * is paired with the corresponding read barrier before the uncached
378          * system memory read of the type, on the card while scanning the
379          * device page.
380          */
381         smp_wmb();
382         dd->type = type;
383         argp->type = type;
384
385         if (bootparam) {
386                 db = bootparam->h2c_config_db;
387                 if (db != -1)
388                         vpdev->hw_ops->send_intr(vpdev, db);
389         }
390         dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
391         return 0;
392 err:
393         vqconfig = mic_vq_config(dd);
394         for (j = 0; j < i; j++) {
395                 struct vop_vringh *vvr = &vdev->vvr[j];
396
397                 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
398                                  vvr->vring.len, DMA_BIDIRECTIONAL);
399                 free_pages((unsigned long)vvr->vring.va,
400                            get_order(vvr->vring.len));
401         }
402         return ret;
403 }
404
405 static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
406                            struct vop_device *vpdev)
407 {
408         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
409         s8 db;
410         int ret, retry;
411         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
412
413         devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
414         db = bootparam->h2c_config_db;
415         if (db != -1)
416                 vpdev->hw_ops->send_intr(vpdev, db);
417         else
418                 goto done;
419         for (retry = 15; retry--;) {
420                 ret = wait_event_timeout(wake, devp->guest_ack,
421                                          msecs_to_jiffies(1000));
422                 if (ret)
423                         break;
424         }
425 done:
426         devp->config_change = 0;
427         devp->guest_ack = 0;
428 }
429
430 static void vop_virtio_del_device(struct vop_vdev *vdev)
431 {
432         struct vop_info *vi = vdev->vi;
433         struct vop_device *vpdev = vdev->vpdev;
434         int i;
435         struct mic_vqconfig *vqconfig;
436         struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
437
438         if (!bootparam)
439                 goto skip_hot_remove;
440         vop_dev_remove(vi, vdev->dc, vpdev);
441 skip_hot_remove:
442         vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
443         flush_work(&vdev->virtio_bh_work);
444         vqconfig = mic_vq_config(vdev->dd);
445         for (i = 0; i < vdev->dd->num_vq; i++) {
446                 struct vop_vringh *vvr = &vdev->vvr[i];
447
448                 dma_unmap_single(&vpdev->dev,
449                                  vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
450                                  DMA_BIDIRECTIONAL);
451                 free_pages((unsigned long)vvr->buf,
452                            get_order(VOP_INT_DMA_BUF_SIZE));
453                 vringh_kiov_cleanup(&vvr->riov);
454                 vringh_kiov_cleanup(&vvr->wiov);
455                 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
456                                  vvr->vring.len, DMA_BIDIRECTIONAL);
457                 free_pages((unsigned long)vvr->vring.va,
458                            get_order(vvr->vring.len));
459         }
460         /*
461          * Order the type update with previous stores. This write barrier
462          * is paired with the corresponding read barrier before the uncached
463          * system memory read of the type, on the card while scanning the
464          * device page.
465          */
466         smp_wmb();
467         vdev->dd->type = -1;
468 }
469
470 /*
471  * vop_sync_dma - Wrapper for synchronous DMAs.
472  *
473  * @dev - The address of the pointer to the device instance used
474  * for DMA registration.
475  * @dst - destination DMA address.
476  * @src - source DMA address.
477  * @len - size of the transfer.
478  *
479  * Return DMA_SUCCESS on success
480  */
481 static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
482                         size_t len)
483 {
484         int err = 0;
485         struct dma_device *ddev;
486         struct dma_async_tx_descriptor *tx;
487         struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
488         struct dma_chan *vop_ch = vi->dma_ch;
489
490         if (!vop_ch) {
491                 err = -EBUSY;
492                 goto error;
493         }
494         ddev = vop_ch->device;
495         tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
496                 DMA_PREP_FENCE);
497         if (!tx) {
498                 err = -ENOMEM;
499                 goto error;
500         } else {
501                 dma_cookie_t cookie;
502
503                 cookie = tx->tx_submit(tx);
504                 if (dma_submit_error(cookie)) {
505                         err = -ENOMEM;
506                         goto error;
507                 }
508                 dma_async_issue_pending(vop_ch);
509                 err = dma_sync_wait(vop_ch, cookie);
510         }
511 error:
512         if (err)
513                 dev_err(&vi->vpdev->dev, "%s %d err %d\n",
514                         __func__, __LINE__, err);
515         return err;
516 }
517
518 #define VOP_USE_DMA true
519
520 /*
521  * Initiates the copies across the PCIe bus from card memory to a user
522  * space buffer. When transfers are done using DMA, source/destination
523  * addresses and transfer length must follow the alignment requirements of
524  * the MIC DMA engine.
525  */
526 static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
527                                    size_t len, u64 daddr, size_t dlen,
528                                    int vr_idx)
529 {
530         struct vop_device *vpdev = vdev->vpdev;
531         void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
532         struct vop_vringh *vvr = &vdev->vvr[vr_idx];
533         struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
534         size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
535         bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
536         size_t dma_offset, partlen;
537         int err;
538
539         if (!VOP_USE_DMA) {
540                 if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
541                         err = -EFAULT;
542                         dev_err(vop_dev(vdev), "%s %d err %d\n",
543                                 __func__, __LINE__, err);
544                         goto err;
545                 }
546                 vdev->in_bytes += len;
547                 err = 0;
548                 goto err;
549         }
550
551         dma_offset = daddr - round_down(daddr, dma_alignment);
552         daddr -= dma_offset;
553         len += dma_offset;
554         /*
555          * X100 uses DMA addresses as seen by the card so adding
556          * the aperture base is not required for DMA. However x200
557          * requires DMA addresses to be an offset into the bar so
558          * add the aperture base for x200.
559          */
560         if (x200)
561                 daddr += vpdev->aper->pa;
562         while (len) {
563                 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
564                 err = vop_sync_dma(vdev, vvr->buf_da, daddr,
565                                    ALIGN(partlen, dma_alignment));
566                 if (err) {
567                         dev_err(vop_dev(vdev), "%s %d err %d\n",
568                                 __func__, __LINE__, err);
569                         goto err;
570                 }
571                 if (copy_to_user(ubuf, vvr->buf + dma_offset,
572                                  partlen - dma_offset)) {
573                         err = -EFAULT;
574                         dev_err(vop_dev(vdev), "%s %d err %d\n",
575                                 __func__, __LINE__, err);
576                         goto err;
577                 }
578                 daddr += partlen;
579                 ubuf += partlen;
580                 dbuf += partlen;
581                 vdev->in_bytes_dma += partlen;
582                 vdev->in_bytes += partlen;
583                 len -= partlen;
584                 dma_offset = 0;
585         }
586         err = 0;
587 err:
588         vpdev->hw_ops->iounmap(vpdev, dbuf);
589         dev_dbg(vop_dev(vdev),
590                 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
591                 __func__, ubuf, dbuf, len, vr_idx);
592         return err;
593 }
594
595 /*
596  * Initiates copies across the PCIe bus from a user space buffer to card
597  * memory. When transfers are done using DMA, source/destination addresses
598  * and transfer length must follow the alignment requirements of the MIC
599  * DMA engine.
600  */
601 static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
602                                      size_t len, u64 daddr, size_t dlen,
603                                      int vr_idx)
604 {
605         struct vop_device *vpdev = vdev->vpdev;
606         void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
607         struct vop_vringh *vvr = &vdev->vvr[vr_idx];
608         struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
609         size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
610         bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
611         size_t partlen;
612         bool dma = VOP_USE_DMA;
613         int err = 0;
614
615         if (daddr & (dma_alignment - 1)) {
616                 vdev->tx_dst_unaligned += len;
617                 dma = false;
618         } else if (ALIGN(len, dma_alignment) > dlen) {
619                 vdev->tx_len_unaligned += len;
620                 dma = false;
621         }
622
623         if (!dma)
624                 goto memcpy;
625
626         /*
627          * X100 uses DMA addresses as seen by the card so adding
628          * the aperture base is not required for DMA. However x200
629          * requires DMA addresses to be an offset into the bar so
630          * add the aperture base for x200.
631          */
632         if (x200)
633                 daddr += vpdev->aper->pa;
634         while (len) {
635                 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
636
637                 if (copy_from_user(vvr->buf, ubuf, partlen)) {
638                         err = -EFAULT;
639                         dev_err(vop_dev(vdev), "%s %d err %d\n",
640                                 __func__, __LINE__, err);
641                         goto err;
642                 }
643                 err = vop_sync_dma(vdev, daddr, vvr->buf_da,
644                                    ALIGN(partlen, dma_alignment));
645                 if (err) {
646                         dev_err(vop_dev(vdev), "%s %d err %d\n",
647                                 __func__, __LINE__, err);
648                         goto err;
649                 }
650                 daddr += partlen;
651                 ubuf += partlen;
652                 dbuf += partlen;
653                 vdev->out_bytes_dma += partlen;
654                 vdev->out_bytes += partlen;
655                 len -= partlen;
656         }
657 memcpy:
658         /*
659          * We are copying to IO below and should ideally use something
660          * like copy_from_user_toio(..) if it existed.
661          */
662         if (copy_from_user((void __force *)dbuf, ubuf, len)) {
663                 err = -EFAULT;
664                 dev_err(vop_dev(vdev), "%s %d err %d\n",
665                         __func__, __LINE__, err);
666                 goto err;
667         }
668         vdev->out_bytes += len;
669         err = 0;
670 err:
671         vpdev->hw_ops->iounmap(vpdev, dbuf);
672         dev_dbg(vop_dev(vdev),
673                 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
674                 __func__, ubuf, dbuf, len, vr_idx);
675         return err;
676 }
677
678 #define MIC_VRINGH_READ true
679
680 /* Determine the total number of bytes consumed in a VRINGH KIOV */
681 static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
682 {
683         int i;
684         u32 total = iov->consumed;
685
686         for (i = 0; i < iov->i; i++)
687                 total += iov->iov[i].iov_len;
688         return total;
689 }
690
691 /*
692  * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
693  * This API is heavily based on the vringh_iov_xfer(..) implementation
694  * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
695  * and vringh_iov_push_kern(..) directly is because there is no
696  * way to override the VRINGH xfer(..) routines as of v3.10.
697  */
698 static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
699                            void __user *ubuf, size_t len, bool read, int vr_idx,
700                            size_t *out_len)
701 {
702         int ret = 0;
703         size_t partlen, tot_len = 0;
704
705         while (len && iov->i < iov->used) {
706                 struct kvec *kiov = &iov->iov[iov->i];
707
708                 partlen = min(kiov->iov_len, len);
709                 if (read)
710                         ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
711                                                       (u64)kiov->iov_base,
712                                                       kiov->iov_len,
713                                                       vr_idx);
714                 else
715                         ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
716                                                         (u64)kiov->iov_base,
717                                                         kiov->iov_len,
718                                                         vr_idx);
719                 if (ret) {
720                         dev_err(vop_dev(vdev), "%s %d err %d\n",
721                                 __func__, __LINE__, ret);
722                         break;
723                 }
724                 len -= partlen;
725                 ubuf += partlen;
726                 tot_len += partlen;
727                 iov->consumed += partlen;
728                 kiov->iov_len -= partlen;
729                 kiov->iov_base += partlen;
730                 if (!kiov->iov_len) {
731                         /* Fix up old iov element then increment. */
732                         kiov->iov_len = iov->consumed;
733                         kiov->iov_base -= iov->consumed;
734
735                         iov->consumed = 0;
736                         iov->i++;
737                 }
738         }
739         *out_len = tot_len;
740         return ret;
741 }
742
743 /*
744  * Use the standard VRINGH infrastructure in the kernel to fetch new
745  * descriptors, initiate the copies and update the used ring.
746  */
747 static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
748 {
749         int ret = 0;
750         u32 iovcnt = copy->iovcnt;
751         struct iovec iov;
752         struct iovec __user *u_iov = copy->iov;
753         void __user *ubuf = NULL;
754         struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
755         struct vringh_kiov *riov = &vvr->riov;
756         struct vringh_kiov *wiov = &vvr->wiov;
757         struct vringh *vrh = &vvr->vrh;
758         u16 *head = &vvr->head;
759         struct mic_vring *vr = &vvr->vring;
760         size_t len = 0, out_len;
761
762         copy->out_len = 0;
763         /* Fetch a new IOVEC if all previous elements have been processed */
764         if (riov->i == riov->used && wiov->i == wiov->used) {
765                 ret = vringh_getdesc_kern(vrh, riov, wiov,
766                                           head, GFP_KERNEL);
767                 /* Check if there are available descriptors */
768                 if (ret <= 0)
769                         return ret;
770         }
771         while (iovcnt) {
772                 if (!len) {
773                         /* Copy over a new iovec from user space. */
774                         ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
775                         if (ret) {
776                                 ret = -EINVAL;
777                                 dev_err(vop_dev(vdev), "%s %d err %d\n",
778                                         __func__, __LINE__, ret);
779                                 break;
780                         }
781                         len = iov.iov_len;
782                         ubuf = iov.iov_base;
783                 }
784                 /* Issue all the read descriptors first */
785                 ret = vop_vringh_copy(vdev, riov, ubuf, len,
786                                       MIC_VRINGH_READ, copy->vr_idx, &out_len);
787                 if (ret) {
788                         dev_err(vop_dev(vdev), "%s %d err %d\n",
789                                 __func__, __LINE__, ret);
790                         break;
791                 }
792                 len -= out_len;
793                 ubuf += out_len;
794                 copy->out_len += out_len;
795                 /* Issue the write descriptors next */
796                 ret = vop_vringh_copy(vdev, wiov, ubuf, len,
797                                       !MIC_VRINGH_READ, copy->vr_idx, &out_len);
798                 if (ret) {
799                         dev_err(vop_dev(vdev), "%s %d err %d\n",
800                                 __func__, __LINE__, ret);
801                         break;
802                 }
803                 len -= out_len;
804                 ubuf += out_len;
805                 copy->out_len += out_len;
806                 if (!len) {
807                         /* One user space iovec is now completed */
808                         iovcnt--;
809                         u_iov++;
810                 }
811                 /* Exit loop if all elements in KIOVs have been processed. */
812                 if (riov->i == riov->used && wiov->i == wiov->used)
813                         break;
814         }
815         /*
816          * Update the used ring if a descriptor was available and some data was
817          * copied in/out and the user asked for a used ring update.
818          */
819         if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
820                 u32 total = 0;
821
822                 /* Determine the total data consumed */
823                 total += vop_vringh_iov_consumed(riov);
824                 total += vop_vringh_iov_consumed(wiov);
825                 vringh_complete_kern(vrh, *head, total);
826                 *head = USHRT_MAX;
827                 if (vringh_need_notify_kern(vrh) > 0)
828                         vringh_notify(vrh);
829                 vringh_kiov_cleanup(riov);
830                 vringh_kiov_cleanup(wiov);
831                 /* Update avail idx for user space */
832                 vr->info->avail_idx = vrh->last_avail_idx;
833         }
834         return ret;
835 }
836
837 static inline int vop_verify_copy_args(struct vop_vdev *vdev,
838                                        struct mic_copy_desc *copy)
839 {
840         if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
841                 return -EINVAL;
842         return 0;
843 }
844
845 /* Copy a specified number of virtio descriptors in a chain */
846 static int vop_virtio_copy_desc(struct vop_vdev *vdev,
847                                 struct mic_copy_desc *copy)
848 {
849         int err;
850         struct vop_vringh *vvr;
851
852         err = vop_verify_copy_args(vdev, copy);
853         if (err)
854                 return err;
855
856         vvr = &vdev->vvr[copy->vr_idx];
857         mutex_lock(&vvr->vr_mutex);
858         if (!vop_vdevup(vdev)) {
859                 err = -ENODEV;
860                 dev_err(vop_dev(vdev), "%s %d err %d\n",
861                         __func__, __LINE__, err);
862                 goto err;
863         }
864         err = _vop_virtio_copy(vdev, copy);
865         if (err) {
866                 dev_err(vop_dev(vdev), "%s %d err %d\n",
867                         __func__, __LINE__, err);
868         }
869 err:
870         mutex_unlock(&vvr->vr_mutex);
871         return err;
872 }
873
874 static int vop_open(struct inode *inode, struct file *f)
875 {
876         struct vop_vdev *vdev;
877         struct vop_info *vi = container_of(f->private_data,
878                 struct vop_info, miscdev);
879
880         vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
881         if (!vdev)
882                 return -ENOMEM;
883         vdev->vi = vi;
884         mutex_init(&vdev->vdev_mutex);
885         f->private_data = vdev;
886         init_completion(&vdev->destroy);
887         complete(&vdev->destroy);
888         return 0;
889 }
890
891 static int vop_release(struct inode *inode, struct file *f)
892 {
893         struct vop_vdev *vdev = f->private_data, *vdev_tmp;
894         struct vop_info *vi = vdev->vi;
895         struct list_head *pos, *tmp;
896         bool found = false;
897
898         mutex_lock(&vdev->vdev_mutex);
899         if (vdev->deleted)
900                 goto unlock;
901         mutex_lock(&vi->vop_mutex);
902         list_for_each_safe(pos, tmp, &vi->vdev_list) {
903                 vdev_tmp = list_entry(pos, struct vop_vdev, list);
904                 if (vdev == vdev_tmp) {
905                         vop_virtio_del_device(vdev);
906                         list_del(pos);
907                         found = true;
908                         break;
909                 }
910         }
911         mutex_unlock(&vi->vop_mutex);
912 unlock:
913         mutex_unlock(&vdev->vdev_mutex);
914         if (!found)
915                 wait_for_completion(&vdev->destroy);
916         f->private_data = NULL;
917         kfree(vdev);
918         return 0;
919 }
920
921 static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
922 {
923         struct vop_vdev *vdev = f->private_data;
924         struct vop_info *vi = vdev->vi;
925         void __user *argp = (void __user *)arg;
926         int ret;
927
928         switch (cmd) {
929         case MIC_VIRTIO_ADD_DEVICE:
930         {
931                 struct mic_device_desc dd, *dd_config;
932
933                 if (copy_from_user(&dd, argp, sizeof(dd)))
934                         return -EFAULT;
935
936                 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
937                     dd.num_vq > MIC_MAX_VRINGS)
938                         return -EINVAL;
939
940                 dd_config = memdup_user(argp, mic_desc_size(&dd));
941                 if (IS_ERR(dd_config))
942                         return PTR_ERR(dd_config);
943
944                 /* Ensure desc has not changed between the two reads */
945                 if (memcmp(&dd, dd_config, sizeof(dd))) {
946                         ret = -EINVAL;
947                         goto free_ret;
948                 }
949                 mutex_lock(&vdev->vdev_mutex);
950                 mutex_lock(&vi->vop_mutex);
951                 ret = vop_virtio_add_device(vdev, dd_config);
952                 if (ret)
953                         goto unlock_ret;
954                 list_add_tail(&vdev->list, &vi->vdev_list);
955 unlock_ret:
956                 mutex_unlock(&vi->vop_mutex);
957                 mutex_unlock(&vdev->vdev_mutex);
958 free_ret:
959                 kfree(dd_config);
960                 return ret;
961         }
962         case MIC_VIRTIO_COPY_DESC:
963         {
964                 struct mic_copy_desc copy;
965
966                 mutex_lock(&vdev->vdev_mutex);
967                 ret = vop_vdev_inited(vdev);
968                 if (ret)
969                         goto _unlock_ret;
970
971                 if (copy_from_user(&copy, argp, sizeof(copy))) {
972                         ret = -EFAULT;
973                         goto _unlock_ret;
974                 }
975
976                 ret = vop_virtio_copy_desc(vdev, &copy);
977                 if (ret < 0)
978                         goto _unlock_ret;
979                 if (copy_to_user(
980                         &((struct mic_copy_desc __user *)argp)->out_len,
981                         &copy.out_len, sizeof(copy.out_len)))
982                         ret = -EFAULT;
983 _unlock_ret:
984                 mutex_unlock(&vdev->vdev_mutex);
985                 return ret;
986         }
987         case MIC_VIRTIO_CONFIG_CHANGE:
988         {
989                 void *buf;
990
991                 mutex_lock(&vdev->vdev_mutex);
992                 ret = vop_vdev_inited(vdev);
993                 if (ret)
994                         goto __unlock_ret;
995                 buf = memdup_user(argp, vdev->dd->config_len);
996                 if (IS_ERR(buf)) {
997                         ret = PTR_ERR(buf);
998                         goto __unlock_ret;
999                 }
1000                 ret = vop_virtio_config_change(vdev, buf);
1001                 kfree(buf);
1002 __unlock_ret:
1003                 mutex_unlock(&vdev->vdev_mutex);
1004                 return ret;
1005         }
1006         default:
1007                 return -ENOIOCTLCMD;
1008         };
1009         return 0;
1010 }
1011
1012 /*
1013  * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
1014  * not when previously enqueued buffers may be available. This means that
1015  * in the card->host (TX) path, when userspace is unblocked by poll it
1016  * must drain all available descriptors or it can stall.
1017  */
1018 static __poll_t vop_poll(struct file *f, poll_table *wait)
1019 {
1020         struct vop_vdev *vdev = f->private_data;
1021         __poll_t mask = 0;
1022
1023         mutex_lock(&vdev->vdev_mutex);
1024         if (vop_vdev_inited(vdev)) {
1025                 mask = EPOLLERR;
1026                 goto done;
1027         }
1028         poll_wait(f, &vdev->waitq, wait);
1029         if (vop_vdev_inited(vdev)) {
1030                 mask = EPOLLERR;
1031         } else if (vdev->poll_wake) {
1032                 vdev->poll_wake = 0;
1033                 mask = EPOLLIN | EPOLLOUT;
1034         }
1035 done:
1036         mutex_unlock(&vdev->vdev_mutex);
1037         return mask;
1038 }
1039
1040 static inline int
1041 vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1042                  unsigned long *size, unsigned long *pa)
1043 {
1044         struct vop_device *vpdev = vdev->vpdev;
1045         unsigned long start = MIC_DP_SIZE;
1046         int i;
1047
1048         /*
1049          * MMAP interface is as follows:
1050          * offset                               region
1051          * 0x0                                  virtio device_page
1052          * 0x1000                               first vring
1053          * 0x1000 + size of 1st vring           second vring
1054          * ....
1055          */
1056         if (!offset) {
1057                 *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1058                 *size = MIC_DP_SIZE;
1059                 return 0;
1060         }
1061
1062         for (i = 0; i < vdev->dd->num_vq; i++) {
1063                 struct vop_vringh *vvr = &vdev->vvr[i];
1064
1065                 if (offset == start) {
1066                         *pa = virt_to_phys(vvr->vring.va);
1067                         *size = vvr->vring.len;
1068                         return 0;
1069                 }
1070                 start += vvr->vring.len;
1071         }
1072         return -1;
1073 }
1074
1075 /*
1076  * Maps the device page and virtio rings to user space for readonly access.
1077  */
1078 static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1079 {
1080         struct vop_vdev *vdev = f->private_data;
1081         unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1082         unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1083         int i, err;
1084
1085         err = vop_vdev_inited(vdev);
1086         if (err)
1087                 goto ret;
1088         if (vma->vm_flags & VM_WRITE) {
1089                 err = -EACCES;
1090                 goto ret;
1091         }
1092         while (size_rem) {
1093                 i = vop_query_offset(vdev, offset, &size, &pa);
1094                 if (i < 0) {
1095                         err = -EINVAL;
1096                         goto ret;
1097                 }
1098                 err = remap_pfn_range(vma, vma->vm_start + offset,
1099                                       pa >> PAGE_SHIFT, size,
1100                                       vma->vm_page_prot);
1101                 if (err)
1102                         goto ret;
1103                 size_rem -= size;
1104                 offset += size;
1105         }
1106 ret:
1107         return err;
1108 }
1109
1110 static const struct file_operations vop_fops = {
1111         .open = vop_open,
1112         .release = vop_release,
1113         .unlocked_ioctl = vop_ioctl,
1114         .poll = vop_poll,
1115         .mmap = vop_mmap,
1116         .owner = THIS_MODULE,
1117 };
1118
1119 int vop_host_init(struct vop_info *vi)
1120 {
1121         int rc;
1122         struct miscdevice *mdev;
1123         struct vop_device *vpdev = vi->vpdev;
1124
1125         INIT_LIST_HEAD(&vi->vdev_list);
1126         vi->dma_ch = vpdev->dma_ch;
1127         mdev = &vi->miscdev;
1128         mdev->minor = MISC_DYNAMIC_MINOR;
1129         snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1130         mdev->name = vi->name;
1131         mdev->fops = &vop_fops;
1132         mdev->parent = &vpdev->dev;
1133
1134         rc = misc_register(mdev);
1135         if (rc)
1136                 dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1137         return rc;
1138 }
1139
1140 void vop_host_uninit(struct vop_info *vi)
1141 {
1142         struct list_head *pos, *tmp;
1143         struct vop_vdev *vdev;
1144
1145         mutex_lock(&vi->vop_mutex);
1146         vop_virtio_reset_devices(vi);
1147         list_for_each_safe(pos, tmp, &vi->vdev_list) {
1148                 vdev = list_entry(pos, struct vop_vdev, list);
1149                 list_del(pos);
1150                 reinit_completion(&vdev->destroy);
1151                 mutex_unlock(&vi->vop_mutex);
1152                 mutex_lock(&vdev->vdev_mutex);
1153                 vop_virtio_del_device(vdev);
1154                 vdev->deleted = true;
1155                 mutex_unlock(&vdev->vdev_mutex);
1156                 complete(&vdev->destroy);
1157                 mutex_lock(&vi->vop_mutex);
1158         }
1159         mutex_unlock(&vi->vop_mutex);
1160         misc_deregister(&vi->miscdev);
1161 }